SYMBOL INDEX (50 symbols across 10 files) FILE: 02_analytical_data_prep/src/DataPreperation.py class DataPreperation (line 1) | class DataPreperation(object): method __init__ (line 2) | def __init__(self): method label_encoder (line 6) | def label_encoder(dataframe,columns=[],frame_type='spark'): method imputer (line 73) | def imputer(dataframe,columns=[], type='median',frame_type='spark'): method polynomial_expansion (line 145) | def polynomial_expansion(dataframe,columns=[], degree=3,frame_type='sp... method get_top_correlations (line 249) | def get_top_correlations(dataframe,columns,frame_type='spark'): method feature_combiner (line 287) | def feature_combiner(training_frame, valid_frame = None, test_frame=No... method shrunken_averages_encoder (line 432) | def shrunken_averages_encoder(training_frame, valid_frame = None,test_... method convert_boolean_to_int (line 778) | def convert_boolean_to_int(frame, rejects=[],frame_type='spark'): method get_type_lists (line 801) | def get_type_lists(frame, rejects=['Id', 'ID','id'],frame_type='spark'): method remove_outliers_by_percentile (line 839) | def remove_outliers_by_percentile(dataframe, columns, limits =.01, fra... method winsorize_columns (line 904) | def winsorize_columns(dataframe, columns, winzerize_type='percentile',... method remove_outliers_by_std (line 1004) | def remove_outliers_by_std(dataframe, columns, standard_deviation_limi... method create_spark_estimator_vector (line 1064) | def create_spark_estimator_vector(df, ignore = [], out_put_column='fea... method dimensionality_reduction (line 1081) | def dimensionality_reduction(train_frame,valid_frame=None,test_frame=N... method pca (line 1492) | def pca(frame,columns=[],k=320,frame_type='spark'): FILE: 03_regression/src/spark_kaggle_starter/feature_combiner.py function feature_combiner (line 7) | def feature_combiner(training_frame, test_frame, nums, valid_frame = Non... FILE: 03_regression/src/spark_kaggle_starter/get_type_lists.py function get_type_lists (line 1) | def get_type_lists(frame, rejects=['Id', 'ID','id'],frame_type='h2o'): FILE: 03_regression/src/spark_kaggle_starter/logging_lib/LoggingController.py class LoggingController (line 8) | class LoggingController(object): method __init__ (line 13) | def __init__(self, profile_name = 'default', s3_bucket = 'emr-related-... method get_datetime_str (line 20) | def get_datetime_str(self): method get_path_for_new_log (line 25) | def get_path_for_new_log(self): method log_matplotlib_plot (line 30) | def log_matplotlib_plot(self,plot, format = 'png'): method log_string (line 53) | def log_string(self,string): FILE: 03_regression/src/spark_kaggle_starter/logging_lib/MarkdownBuilder.py class MarkdownBuilder (line 10) | class MarkdownBuilder(object): method __init__ (line 15) | def __init__(self, profile_name = 'default', s3_bucket = 'emr-related-... method get_datetime_str (line 22) | def get_datetime_str(self): method log_string (line 28) | def log_string(self,string): method build_markdowns (line 34) | def build_markdowns(self): FILE: 03_regression/src/spark_kaggle_starter/spark_controler/emr_controller.py class EMRController (line 22) | class EMRController(object): method __init__ (line 23) | def __init__(self, profile_name = 'default', aws_access_key = False, a... method boto_client (line 49) | def boto_client(self, service): method load_cluster (line 64) | def load_cluster(self, _spark_properties=False): method add_create_step (line 325) | def add_create_step(self, job_flow_id, master_dns): method add_spark_submit_step (line 361) | def add_spark_submit_step(self, job_flow_id,name_of_script_directory): method create_bucket_on_s3 (line 416) | def create_bucket_on_s3(self, bucket_name): method upload_to_s3 (line 431) | def upload_to_s3(self, path_to_file, bucket_name, path_on_s3): method get_maximum_resource_allocation_properties (line 452) | def get_maximum_resource_allocation_properties(self,_master_memory,_ma... method get_datetime_str (line 505) | def get_datetime_str(self): method generate_job_name (line 511) | def generate_job_name(self): method tar_python_script (line 520) | def tar_python_script(self): method remove_temp_files (line 536) | def remove_temp_files(self, s3): method run (line 549) | def run(self,execute_type='create'): method step_copy_data_between_s3_and_hdfs (line 642) | def step_copy_data_between_s3_and_hdfs(self, c, src, dest): FILE: 03_regression/src/spark_kaggle_starter/spark_main.py function glm_grid (line 170) | def glm_grid(X, y, train, valid, should_submit = False): function neural_net_grid (line 225) | def neural_net_grid(X, y, train, valid): function gboosting_grid (line 258) | def gboosting_grid(X, y, train, valid): FILE: 03_regression/src/spark_kaggle_starter/target_encoder.py function target_encoder (line 1) | def target_encoder(training_frame, test_frame, x, y, lambda_=0.15, thres... FILE: 03_regression/src/target_encoder.py function target_encoder (line 5) | def target_encoder(training_frame, test_frame, x, y, lambda_=0.15, thres... FILE: cold_call.py function main (line 9) | def main(argv):