SYMBOL INDEX (105 symbols across 27 files)

FILE: ml_ids/data/dataset.py
  function remove_inf_values (line 12) | def remove_inf_values(df: pd.DataFrame) -> pd.DataFrame:
  function remove_negative_values (line 25) | def remove_negative_values(df: pd.DataFrame, ignore_cols: List[str] = No...
  function add_label_category_column (line 45) | def add_label_category_column(df: pd.DataFrame) -> pd.DataFrame:
  function add_label_is_attack_columns (line 56) | def add_label_is_attack_columns(df: pd.DataFrame) -> pd.DataFrame:
  function load_dataset_generic (line 68) | def load_dataset_generic(load_df_fn,
  function load_dataset (line 107) | def load_dataset(dataset_path: str,
  function load_dataset_hdf (line 139) | def load_dataset_hdf(dataset_path: str,

FILE: ml_ids/data/split_dataset.py
  function split_dataset (line 34) | def split_dataset(dataset_path, output_path, val_size, test_size, nrows,...
  function remove_extra_labels (line 58) | def remove_extra_labels(dataset: pd.DataFrame):
  function save_dataset (line 67) | def save_dataset(dataset: pd.DataFrame, path: str, ds_type: str):

FILE: ml_ids/keras/callbacks.py
  class OneCycleScheduler (line 11) | class OneCycleScheduler(callbacks.Callback):
    method __init__ (line 16) | def __init__(self, iterations, max_rate, start_rate=None,
    method _interpolate (line 26) | def _interpolate(self, iter1, iter2, rate1, rate2):
    method on_batch_begin (line 30) | def on_batch_begin(self, batch, logs):

FILE: ml_ids/keras/evaluation.py
  function evaluate_model (line 7) | def evaluate_model(model, X_train, y_train, X_val, y_val, metric_title):

FILE: ml_ids/keras/metrics.py
  class AveragePrecisionScoreMetric (line 14) | class AveragePrecisionScoreMetric(callbacks.Callback):
    method __init__ (line 19) | def __init__(self, X_val, y_val, batch_size=4096):
    method get_precision_score (line 25) | def get_precision_score(self):
    method on_epoch_end (line 36) | def on_epoch_end(self, epoch, logs):

FILE: ml_ids/keras/model_selection.py
  function cross_val_train (line 11) | def cross_val_train(fit_fn,

FILE: ml_ids/keras/prediction.py
  function predict (line 7) | def predict(model, X, decision_boundary=0.5):
  function predict_proba (line 22) | def predict_proba(model, X):

FILE: ml_ids/libs/dfencoder/dataframe.py
  class EncoderDataFrame (line 36) | class EncoderDataFrame(pd.DataFrame):
    method __init__ (line 37) | def __init__(self, *args, **kwargs):
    method swap (line 40) | def swap(self, likelihood=.15):

FILE: ml_ids/model_selection.py
  function train_val_test_split (line 11) | def train_val_test_split(df: pd.DataFrame,
  function split_x_y (line 46) | def split_x_y(df: pd.DataFrame, y_cols: List[str] = None) -> Tuple[pd.Da...
  function best_precision_for_target_recall (line 60) | def best_precision_for_target_recall(y_true, y_pred_score, target_recall):

FILE: ml_ids/models/gradient_boost/mlflow_wrapper.py
  class CatBoostWrapper (line 10) | class CatBoostWrapper(mlflow.pyfunc.PythonModel):
    method load_context (line 15) | def load_context(self, context):
    method preprocess (line 28) | def preprocess(self, data):
    method predict (line 40) | def predict(self, context, model_input):

FILE: ml_ids/models/gradient_boost/train.py
  function fit_pipeline (line 21) | def fit_pipeline(train_dataset):
  function preprocess_val_dataset (line 41) | def preprocess_val_dataset(pipeline, val_dataset):
  function preprocess_train_dataset (line 55) | def preprocess_train_dataset(pipeline, train_dataset, nr_attack_samples,...
  function calculate_class_weights (line 76) | def calculate_class_weights(y_train):
  function train_gb_classifier (line 87) | def train_gb_classifier(train_pool,
  function train_model (line 127) | def train_model(train_dataset: pd.DataFrame,

FILE: ml_ids/prediction.py
  function predict_proba_positive (line 6) | def predict_proba_positive(clf, X):
  function predict_decision_boundary (line 18) | def predict_decision_boundary(clf, X, decision_boundary=0.5):

FILE: ml_ids/tf_utils.py
  function enable_gpu_memory_growth (line 7) | def enable_gpu_memory_growth():

FILE: ml_ids/transform/preprocessing.py
  function remove_outliers (line 15) | def remove_outliers(df: pd.DataFrame, zscore: int = 3) -> pd.DataFrame:
  function create_pipeline (line 27) | def create_pipeline(df: pd.DataFrame,

FILE: ml_ids/transform/sampling.py
  function upsample_minority_classes (line 10) | def upsample_minority_classes(X: np.ndarray,
  function create_sample_dict (line 45) | def create_sample_dict(df: pd.DataFrame,
  function downsample (line 69) | def downsample(df: pd.DataFrame,

FILE: ml_ids/visualization.py
  function plot_hist (line 14) | def plot_hist(hist,
  function plot_confusion_matrix (line 49) | def plot_confusion_matrix(y_true,
  function identity (line 120) | def identity(x):
  function plot_threshold (line 127) | def plot_threshold(pred_train, pred_val, threshold, size=(15, 5), transf...
  function get_misclassifications (line 147) | def get_misclassifications(y, y_true, pred):
  function print_binary_performance (line 165) | def print_binary_performance(y, y_true, pred, print_misclassifications=T...
  function plot_pr_curve (line 195) | def plot_pr_curve(y_true, y_score, size=(8, 5), average='weighted'):
  function plot_pr_curves (line 219) | def plot_pr_curves(y_true, y_score_dict, size=(8, 5), average='weighted'):
  function plot_pr_threshold_curves (line 244) | def plot_pr_threshold_curves(y_true, y_pred_score, size=(20, 8)):

FILE: models/gradient_boost/envs/local/train.py
  function merge (line 8) | def merge(dict1, dict2):
  function train (line 31) | def train(train_path, val_path, test_path, output_path, param_path):

FILE: models/gradient_boost/envs/sagemaker/container/train.py
  function merge (line 25) | def merge(dict1, dict2):

FILE: models/gradient_boost/envs/sagemaker/scripts/deploy.py
  function unpack (line 13) | def unpack(file):
  function deploy (line 34) | def deploy(config_path, job_id):

FILE: models/gradient_boost/envs/sagemaker/scripts/train.py
  function create_performance_metric_regex (line 10) | def create_performance_metric_regex(id):
  function create_metric_def (line 20) | def create_metric_def(name, regex):
  function get_metric_definitions (line 30) | def get_metric_definitions():
  function train (line 55) | def train(config_path, param_path, image_name, mode, job_id):

FILE: models/gradient_boost/envs/sagemaker/scripts/undeploy.py
  function undeploy (line 9) | def undeploy(config_path):

FILE: models/gradient_boost/project/train.py
  function load_dataset (line 22) | def load_dataset(path):
  function load_train_val_test_dataset (line 33) | def load_train_val_test_dataset(train_path, val_path, test_path):
  function measure_performance (line 44) | def measure_performance(clf, pipeline, dataset):
  function save_artifacts (line 68) | def save_artifacts(cbm_model_path, classifier, pipeline_path, pipeline, ...
  function train (line 109) | def train(train_path,

FILE: notebooks/05_anomaly_detection/notebook_utils.py
  function predict (line 10) | def predict(model, X, y):
  function evaluate_pr_roc (line 17) | def evaluate_pr_roc(pred):
  function plot_evaluation_curves (line 23) | def plot_evaluation_curves(pred):
  function plot_pr_threshold_curves (line 45) | def plot_pr_threshold_curves(pred, pr_plot_lim=[0, 1]):
  function best_precision_for_target_recall (line 68) | def best_precision_for_target_recall(pred, target_recall):
  function get_misclassifications (line 73) | def get_misclassifications(y, pred_binary):
  function print_performance (line 83) | def print_performance(y, pred, threshold):
  function filter_benign (line 101) | def filter_benign(X, y):

FILE: notebooks/06_dl_classifier/notebook_utils.py
  function transform_data (line 9) | def transform_data(dataset,

FILE: notebooks/07_binary_classifier_comparison/notebook_utils.py
  function get_best_model_path (line 9) | def get_best_model_path(trials, model_path_var='model_path'):
  function print_trial_results (line 13) | def print_trial_results(trials, best_run, model_path_var='model_path'):
  function transform_data (line 23) | def transform_data(dataset,

FILE: tests/data/test_dataset.py
  function val_data (line 10) | def val_data():
  function inf_value_count (line 15) | def inf_value_count(df):
  function neg_value_count (line 19) | def neg_value_count(df):
  function nan_value_count (line 25) | def nan_value_count(df):
  function negative_value_columns (line 29) | def negative_value_columns(df):
  function test_loaded_dataset_must_not_contain_inf_values (line 34) | def test_loaded_dataset_must_not_contain_inf_values():
  function test_loaded_dataset_must_not_contain_negative_values (line 40) | def test_loaded_dataset_must_not_contain_negative_values():
  function test_loaded_dataset_must_not_contain_negative_values_except_excluded_cols (line 46) | def test_loaded_dataset_must_not_contain_negative_values_except_excluded...
  function test_loaded_dataset_must_contain_label_category (line 53) | def test_loaded_dataset_must_contain_label_category():
  function test_loaded_dataset_must_contain_label_is_attack (line 59) | def test_loaded_dataset_must_contain_label_is_attack():
  function test_loaded_dataset_must_replace_invalid_value_with_nan (line 70) | def test_loaded_dataset_must_replace_invalid_value_with_nan(val_data):
  function test_loaded_dataset_must_contain_only_specified_columns (line 79) | def test_loaded_dataset_must_contain_only_specified_columns():
  function test_loaded_dataset_must_omit_specified_columns (line 85) | def test_loaded_dataset_must_omit_specified_columns():

FILE: tests/transform/test_preprocessing.py
  function feature_df (line 13) | def feature_df():
  function nan_value_count (line 19) | def nan_value_count(x):
  function test_pipeline_must_impute_all_missing_values (line 23) | def test_pipeline_must_impute_all_missing_values(feature_df):
  function test_pipeline_must_impute_selected_columns_only (line 34) | def test_pipeline_must_impute_selected_columns_only(feature_df):
  function test_pipeline_must_not_impute_values_if_imputer_strategy_none (line 49) | def test_pipeline_must_not_impute_values_if_imputer_strategy_none(featur...
  function test_pipeline_must_reorder_columns (line 61) | def test_pipeline_must_reorder_columns(feature_df):
  function test_pipeline_must_impute_all_missing_values_with_mean (line 75) | def test_pipeline_must_impute_all_missing_values_with_mean(feature_df):
  function test_pipeline_must_impute_all_missing_values_with_median (line 89) | def test_pipeline_must_impute_all_missing_values_with_median(feature_df):
  function test_pipeline_must_scale_all_values (line 103) | def test_pipeline_must_scale_all_values(feature_df):
  function test_pipeline_must_one_hot_encode_categorical_values (line 111) | def test_pipeline_must_one_hot_encode_categorical_values(feature_df):