SYMBOL INDEX (105 symbols across 27 files) FILE: ml_ids/data/dataset.py function remove_inf_values (line 12) | def remove_inf_values(df: pd.DataFrame) -> pd.DataFrame: function remove_negative_values (line 25) | def remove_negative_values(df: pd.DataFrame, ignore_cols: List[str] = No... function add_label_category_column (line 45) | def add_label_category_column(df: pd.DataFrame) -> pd.DataFrame: function add_label_is_attack_columns (line 56) | def add_label_is_attack_columns(df: pd.DataFrame) -> pd.DataFrame: function load_dataset_generic (line 68) | def load_dataset_generic(load_df_fn, function load_dataset (line 107) | def load_dataset(dataset_path: str, function load_dataset_hdf (line 139) | def load_dataset_hdf(dataset_path: str, FILE: ml_ids/data/split_dataset.py function split_dataset (line 34) | def split_dataset(dataset_path, output_path, val_size, test_size, nrows,... function remove_extra_labels (line 58) | def remove_extra_labels(dataset: pd.DataFrame): function save_dataset (line 67) | def save_dataset(dataset: pd.DataFrame, path: str, ds_type: str): FILE: ml_ids/keras/callbacks.py class OneCycleScheduler (line 11) | class OneCycleScheduler(callbacks.Callback): method __init__ (line 16) | def __init__(self, iterations, max_rate, start_rate=None, method _interpolate (line 26) | def _interpolate(self, iter1, iter2, rate1, rate2): method on_batch_begin (line 30) | def on_batch_begin(self, batch, logs): FILE: ml_ids/keras/evaluation.py function evaluate_model (line 7) | def evaluate_model(model, X_train, y_train, X_val, y_val, metric_title): FILE: ml_ids/keras/metrics.py class AveragePrecisionScoreMetric (line 14) | class AveragePrecisionScoreMetric(callbacks.Callback): method __init__ (line 19) | def __init__(self, X_val, y_val, batch_size=4096): method get_precision_score (line 25) | def get_precision_score(self): method on_epoch_end (line 36) | def on_epoch_end(self, epoch, logs): FILE: ml_ids/keras/model_selection.py function cross_val_train (line 11) | def cross_val_train(fit_fn, FILE: ml_ids/keras/prediction.py function predict (line 7) | def predict(model, X, decision_boundary=0.5): function predict_proba (line 22) | def predict_proba(model, X): FILE: ml_ids/libs/dfencoder/dataframe.py class EncoderDataFrame (line 36) | class EncoderDataFrame(pd.DataFrame): method __init__ (line 37) | def __init__(self, *args, **kwargs): method swap (line 40) | def swap(self, likelihood=.15): FILE: ml_ids/model_selection.py function train_val_test_split (line 11) | def train_val_test_split(df: pd.DataFrame, function split_x_y (line 46) | def split_x_y(df: pd.DataFrame, y_cols: List[str] = None) -> Tuple[pd.Da... function best_precision_for_target_recall (line 60) | def best_precision_for_target_recall(y_true, y_pred_score, target_recall): FILE: ml_ids/models/gradient_boost/mlflow_wrapper.py class CatBoostWrapper (line 10) | class CatBoostWrapper(mlflow.pyfunc.PythonModel): method load_context (line 15) | def load_context(self, context): method preprocess (line 28) | def preprocess(self, data): method predict (line 40) | def predict(self, context, model_input): FILE: ml_ids/models/gradient_boost/train.py function fit_pipeline (line 21) | def fit_pipeline(train_dataset): function preprocess_val_dataset (line 41) | def preprocess_val_dataset(pipeline, val_dataset): function preprocess_train_dataset (line 55) | def preprocess_train_dataset(pipeline, train_dataset, nr_attack_samples,... function calculate_class_weights (line 76) | def calculate_class_weights(y_train): function train_gb_classifier (line 87) | def train_gb_classifier(train_pool, function train_model (line 127) | def train_model(train_dataset: pd.DataFrame, FILE: ml_ids/prediction.py function predict_proba_positive (line 6) | def predict_proba_positive(clf, X): function predict_decision_boundary (line 18) | def predict_decision_boundary(clf, X, decision_boundary=0.5): FILE: ml_ids/tf_utils.py function enable_gpu_memory_growth (line 7) | def enable_gpu_memory_growth(): FILE: ml_ids/transform/preprocessing.py function remove_outliers (line 15) | def remove_outliers(df: pd.DataFrame, zscore: int = 3) -> pd.DataFrame: function create_pipeline (line 27) | def create_pipeline(df: pd.DataFrame, FILE: ml_ids/transform/sampling.py function upsample_minority_classes (line 10) | def upsample_minority_classes(X: np.ndarray, function create_sample_dict (line 45) | def create_sample_dict(df: pd.DataFrame, function downsample (line 69) | def downsample(df: pd.DataFrame, FILE: ml_ids/visualization.py function plot_hist (line 14) | def plot_hist(hist, function plot_confusion_matrix (line 49) | def plot_confusion_matrix(y_true, function identity (line 120) | def identity(x): function plot_threshold (line 127) | def plot_threshold(pred_train, pred_val, threshold, size=(15, 5), transf... function get_misclassifications (line 147) | def get_misclassifications(y, y_true, pred): function print_binary_performance (line 165) | def print_binary_performance(y, y_true, pred, print_misclassifications=T... function plot_pr_curve (line 195) | def plot_pr_curve(y_true, y_score, size=(8, 5), average='weighted'): function plot_pr_curves (line 219) | def plot_pr_curves(y_true, y_score_dict, size=(8, 5), average='weighted'): function plot_pr_threshold_curves (line 244) | def plot_pr_threshold_curves(y_true, y_pred_score, size=(20, 8)): FILE: models/gradient_boost/envs/local/train.py function merge (line 8) | def merge(dict1, dict2): function train (line 31) | def train(train_path, val_path, test_path, output_path, param_path): FILE: models/gradient_boost/envs/sagemaker/container/train.py function merge (line 25) | def merge(dict1, dict2): FILE: models/gradient_boost/envs/sagemaker/scripts/deploy.py function unpack (line 13) | def unpack(file): function deploy (line 34) | def deploy(config_path, job_id): FILE: models/gradient_boost/envs/sagemaker/scripts/train.py function create_performance_metric_regex (line 10) | def create_performance_metric_regex(id): function create_metric_def (line 20) | def create_metric_def(name, regex): function get_metric_definitions (line 30) | def get_metric_definitions(): function train (line 55) | def train(config_path, param_path, image_name, mode, job_id): FILE: models/gradient_boost/envs/sagemaker/scripts/undeploy.py function undeploy (line 9) | def undeploy(config_path): FILE: models/gradient_boost/project/train.py function load_dataset (line 22) | def load_dataset(path): function load_train_val_test_dataset (line 33) | def load_train_val_test_dataset(train_path, val_path, test_path): function measure_performance (line 44) | def measure_performance(clf, pipeline, dataset): function save_artifacts (line 68) | def save_artifacts(cbm_model_path, classifier, pipeline_path, pipeline, ... function train (line 109) | def train(train_path, FILE: notebooks/05_anomaly_detection/notebook_utils.py function predict (line 10) | def predict(model, X, y): function evaluate_pr_roc (line 17) | def evaluate_pr_roc(pred): function plot_evaluation_curves (line 23) | def plot_evaluation_curves(pred): function plot_pr_threshold_curves (line 45) | def plot_pr_threshold_curves(pred, pr_plot_lim=[0, 1]): function best_precision_for_target_recall (line 68) | def best_precision_for_target_recall(pred, target_recall): function get_misclassifications (line 73) | def get_misclassifications(y, pred_binary): function print_performance (line 83) | def print_performance(y, pred, threshold): function filter_benign (line 101) | def filter_benign(X, y): FILE: notebooks/06_dl_classifier/notebook_utils.py function transform_data (line 9) | def transform_data(dataset, FILE: notebooks/07_binary_classifier_comparison/notebook_utils.py function get_best_model_path (line 9) | def get_best_model_path(trials, model_path_var='model_path'): function print_trial_results (line 13) | def print_trial_results(trials, best_run, model_path_var='model_path'): function transform_data (line 23) | def transform_data(dataset, FILE: tests/data/test_dataset.py function val_data (line 10) | def val_data(): function inf_value_count (line 15) | def inf_value_count(df): function neg_value_count (line 19) | def neg_value_count(df): function nan_value_count (line 25) | def nan_value_count(df): function negative_value_columns (line 29) | def negative_value_columns(df): function test_loaded_dataset_must_not_contain_inf_values (line 34) | def test_loaded_dataset_must_not_contain_inf_values(): function test_loaded_dataset_must_not_contain_negative_values (line 40) | def test_loaded_dataset_must_not_contain_negative_values(): function test_loaded_dataset_must_not_contain_negative_values_except_excluded_cols (line 46) | def test_loaded_dataset_must_not_contain_negative_values_except_excluded... function test_loaded_dataset_must_contain_label_category (line 53) | def test_loaded_dataset_must_contain_label_category(): function test_loaded_dataset_must_contain_label_is_attack (line 59) | def test_loaded_dataset_must_contain_label_is_attack(): function test_loaded_dataset_must_replace_invalid_value_with_nan (line 70) | def test_loaded_dataset_must_replace_invalid_value_with_nan(val_data): function test_loaded_dataset_must_contain_only_specified_columns (line 79) | def test_loaded_dataset_must_contain_only_specified_columns(): function test_loaded_dataset_must_omit_specified_columns (line 85) | def test_loaded_dataset_must_omit_specified_columns(): FILE: tests/transform/test_preprocessing.py function feature_df (line 13) | def feature_df(): function nan_value_count (line 19) | def nan_value_count(x): function test_pipeline_must_impute_all_missing_values (line 23) | def test_pipeline_must_impute_all_missing_values(feature_df): function test_pipeline_must_impute_selected_columns_only (line 34) | def test_pipeline_must_impute_selected_columns_only(feature_df): function test_pipeline_must_not_impute_values_if_imputer_strategy_none (line 49) | def test_pipeline_must_not_impute_values_if_imputer_strategy_none(featur... function test_pipeline_must_reorder_columns (line 61) | def test_pipeline_must_reorder_columns(feature_df): function test_pipeline_must_impute_all_missing_values_with_mean (line 75) | def test_pipeline_must_impute_all_missing_values_with_mean(feature_df): function test_pipeline_must_impute_all_missing_values_with_median (line 89) | def test_pipeline_must_impute_all_missing_values_with_median(feature_df): function test_pipeline_must_scale_all_values (line 103) | def test_pipeline_must_scale_all_values(feature_df): function test_pipeline_must_one_hot_encode_categorical_values (line 111) | def test_pipeline_must_one_hot_encode_categorical_values(feature_df):