Repository: dvgodoy/handyspark Branch: master Commit: 0fb4c8707b34 Files: 49 Total size: 467.3 KB Directory structure: gitextract_4phs78pk/ ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── README.rst ├── docs/ │ ├── Makefile │ └── source/ │ ├── conf.py │ ├── handyspark.extensions.rst │ ├── handyspark.ml.rst │ ├── handyspark.rst │ ├── handyspark.sql.rst │ ├── includeme.rst │ ├── index.rst │ └── modules.rst ├── handyspark/ │ ├── __init__.py │ ├── extensions/ │ │ ├── __init__.py │ │ ├── common.py │ │ ├── evaluation.py │ │ └── types.py │ ├── ml/ │ │ ├── __init__.py │ │ └── base.py │ ├── plot.py │ ├── sql/ │ │ ├── __init__.py │ │ ├── dataframe.py │ │ ├── datetime.py │ │ ├── pandas.py │ │ ├── schema.py │ │ ├── string.py │ │ └── transform.py │ ├── stats.py │ └── util.py ├── notebooks/ │ └── Exploring_Titanic.ipynb ├── requirements.txt ├── setup.cfg ├── setup.py └── tests/ ├── handyspark/ │ ├── conftest.py │ ├── extensions/ │ │ ├── test_evaluation.py │ │ └── test_types.py │ ├── ml/ │ │ └── test_base.py │ ├── sql/ │ │ ├── test_dataframe.py │ │ ├── test_datetime.py │ │ ├── test_pandas.py │ │ ├── test_schema.py │ │ ├── test_string.py │ │ └── test_transform.py │ ├── test_plot.py │ ├── test_stats.py │ └── test_util.py └── rawdata/ └── train.csv ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .idea examples/spark-warehouse/ tests/spark-warehouse ================================================ FILE: .travis.yml ================================================ language: python sudo: required dist: trusty cache: directories: - $HOME/.ivy2 - $HOME/spark - $HOME/.cache/pip - $HOME/.pip-cache - $HOME/.sbt/launchers jdk: - oraclejdk8 python: - 3.6 sudo: false addons: apt: packages: - axel cache: pip before_install: - export PATH=$HOME/.local/bin:$PATH - pip install -U pip - export PYTHONPATH=$PYTHONPATH:$(pwd) install: # Download spark 2.3.3 - "[ -f spark ] || mkdir spark && cd spark && axel http://www-us.apache.org/dist/spark/spark-2.3.3/spark-2.3.3-bin-hadoop2.7.tgz && cd .." - "tar -xf ./spark/spark-2.3.3-bin-hadoop2.7.tgz" - "export SPARK_HOME=`pwd`/spark-2.3.3-bin-hadoop2.7" - "export PYTHONPATH=$PYTHONPATH:$SPARK_HOME/python" - echo "spark.yarn.jars=$SPARK_HOME/jars/*.jar" > $SPARK_HOME/conf/spark-defaults.conf - pip install -r requirements.txt script: - pytest ./tests ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2018 Daniel Voigt Godoy Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ [![Build Status](https://travis-ci.org/dvgodoy/handyspark.svg?branch=master)](https://travis-ci.org/dvgodoy/handyspark) # HandySpark ## Bringing pandas-like capabilities to Spark dataframes! ***HandySpark*** is a package designed to improve ***PySpark*** user experience, especially when it comes to ***exploratory data analysis***, including ***visualization*** capabilities! It makes fetching data or computing statistics for columns really easy, returning ***pandas objects*** straight away. It also leverages on the recently released ***pandas UDFs*** in Spark to allow for an out-of-the-box usage of common ***pandas functions*** in a Spark dataframe. Moreover, it introduces the ***stratify*** operation, so users can perform more sophisticated analysis, imputation and outlier detection on stratified data without incurring in very computationally expensive ***groupby*** operations. It brings the long missing capability of ***plotting*** data while retaining the advantage of performing distributed computation (unlike many tutorials on the internet, which just convert the whole dataset to pandas and then plot it - don't ever do that!). Finally, it also extends ***evaluation metrics*** for ***binary classification***, so you can easily choose which threshold to use! ## Google Colab Eager to try it out right away? Don't wait any longer! Open the notebook directly on Google Colab and try it yourself: - [Exploring Titanic](https://colab.research.google.com/github/dvgodoy/handyspark/blob/master/notebooks/Exploring_Titanic.ipynb) ## Installation To install ***HandySpark*** from [PyPI](https://pypi.org/project/handyspark/), just type: ```python pip install handyspark ``` ## Documentation You can find the full documentation [here](http://dvgodoy.github.com/handyspark). Here is a ***handy*** list of direct links to some classes, objects and methods used: - [HandyFrame](https://dvgodoy.github.io/handyspark/handyspark.sql.html#handyspark.sql.dataframe.HandyFrame) - [cols](https://dvgodoy.github.io/handyspark/handyspark.sql.html#handyspark.sql.dataframe.HandyColumns) - [pandas](https://dvgodoy.github.io/handyspark/handyspark.sql.html#handyspark.sql.pandas.HandyPandas) - [transformers](https://dvgodoy.github.io/handyspark/handyspark.ml.html#handyspark.ml.base.HandyTransformers) - [isnull](https://dvgodoy.github.io/handyspark/handyspark.html#handyspark.HandyFrame.isnull) - [fill](https://dvgodoy.github.io/handyspark/handyspark.html#handyspark.HandyFrame.fill) - [outliers](https://dvgodoy.github.io/handyspark/handyspark.html#handyspark.HandyFrame.outliers) - [fence](https://dvgodoy.github.io/handyspark/handyspark.html#handyspark.HandyFrame.fence) - [stratify](https://dvgodoy.github.io/handyspark/handyspark.sql.html#handyspark.sql.dataframe.HandyFrame.stratify) - [Bucket](https://dvgodoy.github.io/handyspark/handyspark.sql.html#handyspark.sql.dataframe.Bucket) - [Quantile](https://dvgodoy.github.io/handyspark/handyspark.sql.html#handyspark.sql.dataframe.Quantile) - [HandyImputer](https://dvgodoy.github.io/handyspark/handyspark.ml.html#handyspark.ml.base.HandyImputer) - [HandyFencer](https://dvgodoy.github.io/handyspark/handyspark.ml.html#handyspark.ml.base.HandyFencer) ## Quick Start To use ***HandySpark***, all you need to do is import the package and, after loading your data into a Spark dataframe, call the ***toHandy()*** method to get your own ***HandyFrame***: ```python from pyspark.sql import SparkSession spark = SparkSession.builder.getOrCreate() from handyspark import * sdf = spark.read.csv('./tests/rawdata/train.csv', header=True, inferSchema=True) hdf = sdf.toHandy() ``` ### Fetching and plotting data Now you can easily fetch data as if you were using pandas, just use the ***cols*** object from your ***HandyFrame***: ```python hdf.cols['Name'][:5] ``` It should return a pandas Series object: ``` 0 Braund, Mr. Owen Harris 1 Cumings, Mrs. John Bradley (Florence Briggs Th... 2 Heikkinen, Miss. Laina 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 Allen, Mr. William Henry Name: Name, dtype: object ``` If you include a list of columns, it will return a pandas DataFrame. Due to the distributed nature of data in Spark, it is only possible to fetch the top rows of any given ***HandyFrame***. Using ***cols*** you have access to several pandas-like column and DataFrame based methods implemented in Spark: - min / max / median / q1 / q3 / stddev / mode - nunique - value_counts - corr - hist - boxplot - scatterplot For instance: ```python hdf.cols['Embarked'].value_counts(dropna=False) ``` ``` S 644 C 168 Q 77 NaN 2 Name: Embarked, dtype: int64 ``` You can also make some plots: ```python from matplotlib import pyplot as plt fig, axs = plt.subplots(1, 4, figsize=(12, 4)) hdf.cols['Embarked'].hist(ax=axs[0]) hdf.cols['Age'].boxplot(ax=axs[1]) hdf.cols['Fare'].boxplot(ax=axs[2]) hdf.cols[['Fare', 'Age']].scatterplot(ax=axs[3]) ``` ![cols plots](/images/cols_plot.png) Handy, right (pun intended!)? But things can get ***even more*** interesting if you use ***stratify***! ### Stratify Stratifying a HandyFrame means using a ***split-apply-combine*** approach. It will first split your HandyFrame according to the specified (discrete) columns, then it will apply some function to each stratum of data and finally combine the results back together. This is better illustrated with an example - let's try the stratified version of our previous `value_counts`: ```python hdf.stratify(['Pclass']).cols['Embarked'].value_counts() ``` ``` Pclass Embarked 1 C 85 Q 2 S 127 2 C 17 Q 3 S 164 3 C 66 Q 72 S 353 Name: value_counts, dtype: int64 ``` Cool, isn't it? Besides, under the hood, not a single ***group by*** operation was performed - everything is handled using filter clauses! So, ***no data shuffling***! What if you want to ***stratify*** on a column containing continuous values? No problem! ```python hdf.stratify(['Sex', Bucket('Age', 2)]).cols['Embarked'].value_counts() ``` ``` Sex Age Embarked female Age >= 0.4200 and Age < 40.2100 C 46 Q 12 S 154 Age >= 40.2100 and Age <= 80.0000 C 15 S 32 male Age >= 0.4200 and Age < 40.2100 C 53 Q 11 S 287 Age >= 40.2100 and Age <= 80.0000 C 16 Q 5 S 81 Name: value_counts, dtype: int64 ``` You can use either ***Bucket*** or ***Quantile*** to discretize your data in any given number of bins! What about ***plotting*** it? Yes, ***HandySpark*** can handle that as well! ```python hdf.stratify(['Sex', Bucket('Age', 2)]).cols['Embarked'].hist(figsize=(8, 6)) ``` ![stratified hist](/images/stratified_hist.png) ### Handling missing data ***HandySpark*** makes it very easy to spot and fill missing values. To figure if there are any missing values, just use ***isnull***: ```python hdf.isnull(ratio=True) ``` ``` PassengerId 0.000000 Survived 0.000000 Pclass 0.000000 Name 0.000000 Sex 0.000000 Age 0.198653 SibSp 0.000000 Parch 0.000000 Ticket 0.000000 Fare 0.000000 Cabin 0.771044 Embarked 0.002245 Name: missing(ratio), dtype: float64 ``` Ok, now you know there are 3 columns with missing values: `Age`, `Cabin` and `Embarked`. It's time to fill those values up! But, let's skip `Cabin`, which has 77% of its values missing! So, `Age` is a continuous variable, while `Embarked` is a categorical variable. Let's start with the latter: ```python hdf_filled = hdf.fill(categorical=['Embarked']) ``` ***HandyFrame*** has a ***fill*** method which takes up to 3 arguments: - categorical: a list of categorical variables - continuous: a list of continuous variables - strategy: which strategy to use for each one of the continuous variables (either `mean` or `median`) Categorical variables use a `mode` strategy by default. But you do not need to stick with the basics anymore... you can fancy it up using ***stratify*** together with ***fill***: ```python hdf_filled = hdf_filled.stratify(['Pclass', 'Sex']).fill(continuous=['Age'], strategy=['mean']) ``` How do you know which values are being used? Simple enough: ```python hdf_filled.statistics_ ``` ``` {'Age': {'Pclass == "1" and Sex == "female"': 34.61176470588235, 'Pclass == "1" and Sex == "male"': 41.28138613861386, 'Pclass == "2" and Sex == "female"': 28.722972972972972, 'Pclass == "2" and Sex == "male"': 30.74070707070707, 'Pclass == "3" and Sex == "female"': 21.75, 'Pclass == "3" and Sex == "male"': 26.507588932806325}, 'Embarked': 'S'} ``` There you go! The filter clauses and the corresponding imputation values! But there is ***more*** - once you're with your imputation procedure, why not generate a ***custom transformer*** to do that for you, either on your test set or in production? You only need to call the ***imputer*** method of the ***transformer*** object that every ***HandyFrame*** has: ```python imputer = hdf_filled.transformers.imputer() ``` In the example above, ***imputer*** is now a full-fledged serializable PySpark transformer! What does that mean? You can use it in your ***pipeline*** and ***save / load*** at will :-) ### Detecting outliers Second only to the problem of missing data, outliers can pose a challenge for training machine learning models. ***HandyFrame*** to the rescue, with its ***outliers*** method: ```python hdf_filled.outliers(method='tukey', k=3.) ``` ``` PassengerId 0.0 Survived 0.0 Pclass 0.0 Age 1.0 SibSp 12.0 Parch 213.0 Fare 53.0 dtype: float64 ``` Currently, only [***Tukey's***](https://en.wikipedia.org/wiki/Outlier#Tukey's_fences) method is available. This method takes an optional ***k*** argument, which you can set to larger values (like 3) to allow for a more loose detection. The good thing is, now we can take a peek at the data by plotting it: ```python from matplotlib import pyplot as plt fig, axs = plt.subplots(1, 4, figsize=(16, 4)) hdf_filled.cols['Parch'].hist(ax=axs[0]) hdf_filled.cols['SibSp'].hist(ax=axs[1]) hdf_filled.cols['Age'].boxplot(ax=axs[2], k=3) hdf_filled.cols['Fare'].boxplot(ax=axs[3], k=3) ``` ![outliers](/images/outliers.png) Let's focus on the `Fare` column - what can we do about it? Well, we could use Tukey's fences to, er... ***fence*** the outliers :-) ```python hdf_fenced = hdf_filled.fence(['Fare']) ``` Which values were used, you ask? ```python hdf_fenced.fences_ ``` ``` {'Fare': [-26.0105, 64.4063]} ``` It works quite similarly to the ***fill*** method and, I hope you guessed, it ***also*** gives you the ability to create the corresponding ***custom transformer*** :-) ```python fencer = hdf_fenced.transformers.fencer() ``` You can also use [***Mahalanobis distance***](https://en.wikipedia.org/wiki/Mahalanobis_distance) to identify outliers in a multi-dimensional space, given a critical value (usually 99.9%, but you are free to have either more restriced or relaxed threshold). To get the outliers for a subset of columns (only ***numerical*** columns are considered!): ``` outliers = hdf_filled.cols[['Age', 'Fare', 'SibSp']].get_outliers(critical_value=.90) ``` Let's take a look at the first 5 outliers found: ``` outliers.cols[:][:5] ``` ![outliers](/images/mahalanobis_outliers.png) What if you want to discard these sample? You just need to call `remove_outliers`: ``` hdf_without_outliers = hdf_filled.cols[['Age', 'Fare', 'SibSp']].remove_outliers(critical_value=0.90) ``` ### Evaluating your model! You cleaned your data, you trained your classification model, you fine-tuned it and now you want to ***evaluate*** it, right? ``` from pyspark.ml.feature import VectorAssembler from pyspark.ml.classification import RandomForestClassifier from pyspark.ml.pipeline import Pipeline from pyspark.ml.evaluation import BinaryClassificationEvaluator assem = VectorAssembler(inputCols=['Fare', 'Pclass', 'Age'], outputCol='features') rf = RandomForestClassifier(featuresCol='features', labelCol='Survived', numTrees=20) pipeline = Pipeline(stages=[assem, rf]) model = pipeline.fit(hdf_fenced) predictions = model.transform(hdf_fenced) evaluator = BinaryClassificationEvaluator(labelCol='Survived') evaluator.evaluate(predictions) ``` Then you realize evaluators only give you `areaUnderROC` and `areaUnderPR`. How about ***plotting ROC or PR curves***? How about ***finding a threshold*** that suits your needs for False Positive or False negatives? ***HandySpark*** extends the ***BinaryClassificationMetrics*** object to take ***DataFrames*** and output ***all your evaluation needs***! ``` bcm = BinaryClassificationMetrics(predictions, scoreCol='probability', labelCol='Survived') ``` Now you can ***plot*** the curves... ``` from matplotlib import pyplot as plt fig, axs = plt.subplots(1, 2, figsize=(12, 4)) bcm.plot_roc_curve(ax=axs[0]) bcm.plot_pr_curve(ax=axs[1]) ``` ![curves](/images/evaluation_curves.png) ...or get metrics for every ***threshold***... ``` bcm.getMetricsByThreshold().toPandas()[100:105] ``` ![metrics](/images/metrics_thresholds.png) ...or the ***confusion matrix*** for the threshold you chose: ``` bcm.print_confusion_matrix(.572006) ``` ![cm](/images/confusion.png) ### Pandas and more pandas! With ***HandySpark*** you can feel ***almost*** as if you were using traditional pandas :-) To gain access to the whole suite of available pandas functions, you need to leverage the ***pandas*** object of your ***HandyFrame***: ```python some_ports = hdf_fenced.pandas['Embarked'].isin(values=['C', 'Q']) some_ports ``` ``` Column(Embarked,)`'> ``` In the example above, ***HandySpark*** treats the `Embarked` column as if it were a pandas Series and, therefore, you may call its ***isin*** method! But, remember Spark has ***lazy evaluation***, so the result is a ***column expression*** which leverages the power of ***pandas UDFs*** (provived that PyArrow is installed, otherwise it will fall back to traditional UDFs). The only thing left to do is to actually ***assign*** the results to a new column, right? ```python hdf_fenced = hdf_fenced.assign(is_c_or_q=some_ports) # What's in there? hdf_fenced.cols['is_c_or_q'][:5] ``` ``` 0 True 1 False 2 False 3 True 4 True Name: is_c_or_q, dtype: bool ``` You got that right! ***HandyFrame*** has a very convenient ***assign*** method, just like in pandas! It does not get much easier than that :-) There are several column methods available already: - betweeen / between_time - isin - isna / isnull - notna / notnull - abs - clip / clip_lower / clip_upper - replace - round / truncate - tz_convert / tz_localize And this is not all! Both specialized ***str*** and ***dt*** objects from pandas are available as well! For instance, if you want to find if a given string contains another substring? ```python col_mrs = hdf_fenced.pandas['Name'].str.find(sub='Mrs.') hdf_fenced = hdf_fenced.assign(is_mrs=col_mrs > 0) ``` ![is mrs](/images/is_mrs.png) There are many, many more available methods: 1. ***String methods***: - contains - startswith / endswitch - match - isalpha / isnumeric / isalnum / isdigit / isdecimal / isspace - islower / isupper / istitle - replace - repeat - join - pad - slice / slice_replace - strip / lstrip / rstrip - wrap / center / ljust / rjust - translate - get - normalize - lower / upper / capitalize / swapcase / title - zfill - count - find / rfind - len 2. ***Date / Datetime methods***: - is_leap_year / is_month_end / is_month_start / is_quarter_end / is_quarter_start / is_year_end / is_year_start - strftime - tz / time / tz_convert / tz_localize - day / dayofweek / dayofyear / days_in_month / daysinmonth - hour / microsecond / minute / nanosecond / second - week / weekday / weekday_name - month / quarter / year / weekofyear - date - ceil / floor / round - normalize ### Your own functions The sky is the limit! You can create regular Python functions and use assign to create new columns :-) No need to worry about turning them into ***pandas UDFs*** - everything is handled by ***HandySpark*** under the hood! The arguments of your function (or `lambda`) should have the names of the columns you want to use. For instance, to take the `log` of `Fare`: ```python import numpy as np hdf_fenced = hdf_fenced.assign(logFare=lambda Fare: np.log(Fare + 1)) ``` ![logfare](/images/logfare.png) You can also use multiple columns: ```python hdf_fenced = hdf_fenced.assign(fare_times_age=lambda Fare, Age: Fare * Age) ``` Even though the result is kinda pointless, it will work :-) Keep in mind that the ***return type***, that is, the column type of the new column, will be the same as the first column used (`Fare`, in the example). What if you want to return something of a ***different*** type?! No worries! You only need to ***wrap*** your function with the desired return type. An example should make this more clear: ```python from pyspark.sql.types import StringType hdf_fenced = hdf_fenced.assign(str_fare=StringType.ret(lambda Fare: Fare.map('${:,.2f}'.format))) hdf_fenced.cols['str_fare'][:5] ``` ``` 0 $65.66 1 $53.10 2 $26.55 3 $65.66 4 $65.66 Name: str_fare, dtype: object ``` Basically, we imported the desired output type - ***StringType*** - and used its extended method ***ret*** to wrap our `lambda` function that formats our numeric `Fare` column into a string. It is also possible to create a more complex type, like an array of doubles: ```python from pyspark.sql.types import ArrayType, DoubleType def make_list(Fare): return Fare.apply(lambda v: [v, v*2]) hdf_fenced = hdf_fenced.assign(fare_list=ArrayType(DoubleType()).ret(make_list)) hdf_fenced.cols['fare_list'][:5] ``` ``` 0 [7.25, 14.5] 1 [71.2833, 142.5666] 2 [7.925, 15.85] 3 [53.1, 106.2] 4 [8.05, 16.1] Name: fare_list, dtype: object ``` OK, so, what happened here? 1. First, we imported the necessary types, ***ArrayType*** and ***DoubleType***, since we are building a function that returns a list of doubles. 2. We actually built the function - notice that we call ***apply*** straight from ***Fare***, which is treated as a pandas Series under the hood. 3. We ***wrap*** the function with the return type `ArrayType(DoubleType())` by invoking the extended method `ret`. 4. Finally, we assign it to a new column name, and that's it! ### Nicer exceptions Now, suppose you make a mistake while creating your function... if you have used Spark for a while, you already realized that, when an exception is raised, it will be ***loooong***, right? To help you with that, ***HandySpark*** analyzes the error message and parses it nicely for you at the very ***top*** of the error message, in ***bold red***: ![exception](/images/handy_exception.png) ### Safety first ***HandySpark*** wants to protect your cluster and network, so it implements a ***safety*** whenever you perform an operation that are going to retrieve ***ALL*** data from your ***HandyFrame***, like `collect` or `toPandas`. How does that work? Every time a ***HandyFrame*** has one of these methods called, it will output up to the ***safety limit***, which has a default of ***1,000 elements***. ![safety on](/images/safety_on.png) Do you want to set a different safety limit for your ***HandyFrame***? ![safety limit](/images/safety_limit.png) What if you want to retrieve everything nonetheless?! You can invoke the ***safety_off*** method prior to the actual method you want to call and you get a ***one-time*** unlimited result. ![safety off](/images/safety_off.png) ### Don't feel like Handy anymore? To get back your original Spark dataframe, you only need to call ***notHandy*** to make it not handy again: ```python hdf_fenced.notHandy() ``` ``` DataFrame[PassengerId: int, Survived: int, Pclass: int, Name: string, Sex: string, Age: double, SibSp: int, Parch: int, Ticket: string, Fare: double, Cabin: string, Embarked: string, logFare: double, is_c_or_q: boolean] ``` ## Comments, questions, suggestions, bugs ***DISCLAIMER***: this is a project ***under development***, so it is likely you'll run into bugs/problems. So, if you find any bugs/problems, please open an [issue](https://github.com/dvgodoy/handyspark/issues) or submit a [pull request](https://github.com/dvgodoy/handyspark/pulls). ================================================ FILE: README.rst ================================================ .. image:: https://travis-ci.org/dvgodoy/handyspark.svg?branch=master :target: https://travis-ci.org/dvgodoy/handyspark :alt: Build Status HandySpark ========== Bringing pandas-like capabilities to Spark dataframes! ------------------------------------------------------ *HandySpark* is a package designed to improve *PySpark* user experience, especially when it comes to *exploratory data analysis* , including *visualization* capabilities! It makes fetching data or computing statistics for columns really easy, returning *pandas objects* straight away. It also leverages on the recently released *pandas UDFs* in Spark to allow for an out-of-the-box usage of common *pandas functions* in a Spark dataframe. Moreover, it introduces the *stratify* operation, so users can perform more sophisticated analysis, imputation and outlier detection on stratified data without incurring in very computationally expensive *groupby* operations. Finally, it brings the long missing capability of *plotting* data while retaining the advantage of performing distributed computation (unlike many tutorials on the internet, which just convert the whole dataset to pandas and then plot it - don't ever do that!). Google Colab ------------ Eager to try it out right away? Don't wait any longer! Open the notebook directly on Google Colab and try it yourself: * `Exploring Titanic `_ Installation ------------ To install *HandySpark* from `PyPI `_, just type: .. code-block:: python pip install handyspark Documentation ------------- You can find the full documentation `here `_. Quick Start ----------- To use *HandySpark* , all you need to do is import the package and, after loading your data into a Spark dataframe, call the *toHandy()* method to get your own *HandyFrame* : .. code-block:: python from pyspark.sql import SparkSession spark = SparkSession.builder.getOrCreate() from handyspark import * sdf = spark.read.csv('./tests/rawdata/train.csv', header=True, inferSchema=True) hdf = sdf.toHandy() Fetching and plotting data ^^^^^^^^^^^^^^^^^^^^^^^^^^ Now you can easily fetch data as if you were using pandas, just use the *cols* object from your *HandyFrame* : .. code-block:: python hdf.cols['Name'][:5] It should return a pandas Series object: .. code-block:: 0 Braund, Mr. Owen Harris 1 Cumings, Mrs. John Bradley (Florence Briggs Th... 2 Heikkinen, Miss. Laina 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 Allen, Mr. William Henry Name: Name, dtype: object If you include a list of columns, it will return a pandas DataFrame. Due to the distributed nature of data in Spark, it is only possible to fetch the top rows of any given *HandyFrame*. Using *cols* you have access to several pandas-like column and DataFrame based methods implemented in Spark: * min / max / median / q1 / q3 / stddev / mode * nunique * value_counts * corr * hist * boxplot * scatterplot For instance: .. code-block:: python hdf.cols['Embarked'].value_counts(dropna=False) .. code-block:: S 644 C 168 Q 77 NaN 2 Name: Embarked, dtype: int64 You can also make some plots: .. code-block:: python from matplotlib import pyplot as plt fig, axs = plt.subplots(1, 4, figsize=(12, 4)) hdf.cols['Embarked'].hist(ax=axs[0]) hdf.cols['Age'].boxplot(ax=axs[1]) hdf.cols['Fare'].boxplot(ax=axs[2]) hdf.cols[['Fare', 'Age']].scatterplot(ax=axs[3]) .. image:: /images/cols_plot.png :target: /images/cols_plot.png :alt: cols plots Handy, right (pun intended!)? But things can get *even more* interesting if you use *stratify* ! Stratify ^^^^^^^^ Stratifying a HandyFrame means using a *split-apply-combine* approach. It will first split your HandyFrame according to the specified (discrete) columns, then it will apply some function to each stratum of data and finally combine the results back together. This is better illustrated with an example - let's try the stratified version of our previous ``value_counts``\ : .. code-block:: python hdf.stratify(['Pclass']).cols['Embarked'].value_counts() .. code-block:: Pclass Embarked 1 C 85 Q 2 S 127 2 C 17 Q 3 S 164 3 C 66 Q 72 S 353 Name: value_counts, dtype: int64 Cool, isn't it? Besides, under the hood, not a single *group by* operation was performed - everything is handled using filter clauses! So, *no data shuffling* ! What if you want to *stratify* on a column containing continuous values? No problem! .. code-block:: python hdf.stratify(['Sex', Bucket('Age', 2)]).cols['Embarked'].value_counts() .. code-block:: Sex Age Embarked female Age >= 0.4200 and Age < 40.2100 C 46 Q 12 S 154 Age >= 40.2100 and Age <= 80.0000 C 15 S 32 male Age >= 0.4200 and Age < 40.2100 C 53 Q 11 S 287 Age >= 40.2100 and Age <= 80.0000 C 16 Q 5 S 81 Name: value_counts, dtype: int64 You can use either *Bucket* or *Quantile* to discretize your data in any given number of bins! What about *plotting* it? Yes, *HandySpark* can handle that as well! .. code-block:: python hdf.stratify(['Sex', Bucket('Age', 2)]).cols['Embarked'].hist(figsize=(8, 6)) .. image:: /images/stratified_hist.png :target: /images/stratified_hist.png :alt: stratified hist Handling missing data ^^^^^^^^^^^^^^^^^^^^^ *HandySpark* makes it very easy to spot and fill missing values. To figure if there are any missing values, just use *isnull* : .. code-block:: python hdf.isnull(ratio=True) .. code-block:: PassengerId 0.000000 Survived 0.000000 Pclass 0.000000 Name 0.000000 Sex 0.000000 Age 0.198653 SibSp 0.000000 Parch 0.000000 Ticket 0.000000 Fare 0.000000 Cabin 0.771044 Embarked 0.002245 Name: missing(ratio), dtype: float64 Ok, now you know there are 3 columns with missing values: ``Age``\ , ``Cabin`` and ``Embarked``. It's time to fill those values up! But, let's skip ``Cabin``\ , which has 77% of its values missing! So, ``Age`` is a continuous variable, while ``Embarked`` is a categorical variable. Let's start with the latter: .. code-block:: python hdf_filled = hdf.fill(categorical=['Embarked']) *HandyFrame* has a *fill* method which takes up to 3 arguments: * categorical: a list of categorical variables * continuous: a list of continuous variables * strategy: which strategy to use for each one of the continuous variables (either ``mean`` or ``median``\ ) Categorical variables use a ``mode`` strategy by default. But you do not need to stick with the basics anymore... you can fancy it up using *stratify* together with *fill* : .. code-block:: python hdf_filled = hdf_filled.stratify(['Pclass', 'Sex']).fill(continuous=['Age'], strategy=['mean']) How do you know which values are being used? Simple enough: .. code-block:: python hdf_filled.statistics_ .. code-block:: {'Embarked': 'S', 'Pclass == "1" and Sex == "female"': {'Age': 34.61176470588235}, 'Pclass == "1" and Sex == "male"': {'Age': 41.28138613861386}, 'Pclass == "2" and Sex == "female"': {'Age': 28.722972972972972}, 'Pclass == "2" and Sex == "male"': {'Age': 30.74070707070707}, 'Pclass == "3" and Sex == "female"': {'Age': 21.75}, 'Pclass == "3" and Sex == "male"': {'Age': 26.507588932806325}} There you go! The filter clauses and the corresponding imputation values! But there is *more* - once you're with your imputation procedure, why not generate a *custom transformer* to do that for you, either on your test set or in production? You only need to call the *imputer* method of the *transformer* object that every *HandyFrame* has: .. code-block:: python imputer = hdf_filled.transformers.imputer() In the example above, *imputer* is now a full-fledged serializable PySpark transformer! What does that mean? You can use it in your *pipeline* and *save / load* at will :-) Detecting outliers ^^^^^^^^^^^^^^^^^^ Second only to the problem of missing data, outliers can pose a challenge for training machine learning models. *HandyFrame* to the rescue, with its *outliers* method: .. code-block:: python hdf_filled.outliers(method='tukey', k=3.) .. code-block:: PassengerId 0.0 Survived 0.0 Pclass 0.0 Age 1.0 SibSp 12.0 Parch 213.0 Fare 53.0 dtype: float64 Currently, only `\ *Tukey's* `_ method is available (I am working on Mahalanobis distance!). This method takes an optional *k* argument, which you can set to larger values (like 3) to allow for a more loose detection. The good thing is, now we can take a peek at the data by plotting it: .. code-block:: python from matplotlib import pyplot as plt fig, axs = plt.subplots(1, 4, figsize=(16, 4)) hdf_filled.cols['Parch'].hist(ax=axs[0]) hdf_filled.cols['SibSp'].hist(ax=axs[1]) hdf_filled.cols['Age'].boxplot(ax=axs[2], k=3) hdf_filled.cols['Fare'].boxplot(ax=axs[3], k=3) .. image:: /images/outliers.png :target: /images/outliers.png :alt: outliers Let's focus on the ``Fare`` column - what can we do about it? Well, we could use Tukey's fences to, er... *fence* the outliers :-) .. code-block:: python hdf_fenced = hdf_filled.fence(['Fare']) Which values were used, you ask? .. code-block:: python hdf_fenced.fences_ .. code-block:: {'Fare': [-26.7605, 65.6563]} It works quite similarly to the *fill* method and, I hope you guessed, it *also* gives you the ability to create the corresponding *custom transformer* :-) .. code-block:: python fencer = hdf_fenced.transformers.fencer() Pandas and more pandas! ^^^^^^^^^^^^^^^^^^^^^^^ With *HandySpark* you can feel *almost* as if you were using traditional pandas :-) To gain access to the whole suite of available pandas functions, you need to leverage the *pandas* object of your *HandyFrame* : .. code-block:: python some_ports = hdf_fenced.pandas['Embarked'].isin(values=['C', 'Q']) some_ports .. code-block:: Column(Embarked,)`'> In the example above, *HandySpark* treats the ``Embarked`` column as if it were a pandas Series and, therefore, you may call its *isin* method! But, remember Spark has *lazy evaluation* , so the result is a *column expression* which leverages the power of *pandas UDFs* (provived that PyArrow is installed, otherwise it will fall back to traditional UDFs). The only thing left to do is to actually *assign* the results to a new column, right? .. code-block:: python hdf_fenced = hdf_fenced.assign(is_c_or_q=some_ports) # What's in there? hdf_fenced.cols['is_c_or_q'][:5] .. code-block:: 0 True 1 False 2 False 3 True 4 True Name: is_c_or_q, dtype: bool You got that right! *HandyFrame* has a very convenient *assign* method, just like in pandas! It does not get much easier than that :-) There are several column methods available already: * betweeen / between_time * isin * isna / isnull * notna / notnull * abs * clip / clip_lower / clip_upper * replace * round / truncate * tz_convert / tz_localize And this is not all! Both specialized *str* and *dt* objects from pandas are available as well! For instance, if you want to find if a given string contains another substring? .. code-block:: python col_mrs = hdf_fenced.pandas['Name'].str.find(sub='Mrs.') hdf_fenced = hdf_fenced.assign(is_mrs=col_mrs > 0) .. image:: /images/is_mrs.png :target: /images/is_mrs.png :alt: is mrs There are many, many more available methods: *String methods* : #. contains #. startswith / endswitch #. match #. isalpha / isnumeric / isalnum / isdigit / isdecimal / isspace #. islower / isupper / istitle #. replace #. repeat #. join #. pad #. slice / slice_replace #. strip / lstrip / rstrip #. wrap / center / ljust / rjust #. translate #. get #. normalize #. lower / upper / capitalize / swapcase / title #. zfill #. count #. find / rfind #. len *Date / Datetime methods* : #. is_leap_year / is_month_end / is_month_start / is_quarter_end / is_quarter_start / is_year_end / is_year_start #. strftime #. tz / time / tz_convert / tz_localize #. day / dayofweek / dayofyear / days_in_month / daysinmonth #. hour / microsecond / minute / nanosecond / second #. week / weekday / weekday_name #. month / quarter / year / weekofyear #. date #. ceil / floor / round #. normalize Your own functions ^^^^^^^^^^^^^^^^^^ The sky is the limit! You can create regular Python functions and use assign to create new columns :-) No need to worry about turning them into *pandas UDFs* - everything is handled by *HandySpark* under the hood! The arguments of your function (or ``lambda``\ ) should have the names of the columns you want to use. For instance, to take the ``log`` of ``Fare``\ : .. code-block:: python import numpy as np hdf_fenced = hdf_fenced.assign(logFare=lambda Fare: np.log(Fare + 1)) .. image:: /images/logfare.png :target: /images/logfare.png :alt: logfare You can also use multiple columns: .. code-block:: python hdf_fenced = hdf_fenced.assign(fare_times_age=lambda Fare, Age: Fare * Age) Even though the result is kinda pointless, it will work :-) Keep in mind that the *return type* , that is, the column type of the new column, will be the same as the first column used (\ ``Fare``\ , in the example). What if you want to return something of a *different* type?! No worries! You only need to *wrap* your function with the desired return type. An example should make this more clear: .. code-block:: python from pyspark.sql.types import StringType hdf_fenced = hdf_fenced.assign(str_fare=StringType.ret(lambda Fare: Fare.map('${:,.2f}'.format))) hdf_fenced.cols['str_fare'][:5] .. code-block:: 0 $65.66 1 $53.10 2 $26.55 3 $65.66 4 $65.66 Name: str_fare, dtype: object Basically, we imported the desired output type - *StringType* - and used its extended method *ret* to wrap our ``lambda`` function that formats our numeric ``Fare`` column into a string. It is also possible to create a more complex type, like an array of doubles: .. code-block:: python from pyspark.sql.types import ArrayType, DoubleType def make_list(Fare): return Fare.apply(lambda v: [v, v*2]) hdf_fenced = hdf_fenced.assign(fare_list=ArrayType(DoubleType()).ret(make_list)) hdf_fenced.cols['fare_list'][:5] .. code-block:: 0 [7.25, 14.5] 1 [71.2833, 142.5666] 2 [7.925, 15.85] 3 [53.1, 106.2] 4 [8.05, 16.1] Name: fare_list, dtype: object OK, so, what happened here? #. First, we imported the necessary types, *ArrayType* and *DoubleType* , since we are building a function that returns a list of doubles. #. We actually built the function - notice that we call *apply* straight from *Fare* , which is treated as a pandas Series under the hood. #. We *wrap* the function with the return type ``ArrayType(DoubleType())`` by invoking the extended method ``ret``. #. Finally, we assign it to a new column name, and that's it! Nicer exceptions ^^^^^^^^^^^^^^^^ Now, suppose you make a mistake while creating your function... if you have used Spark for a while, you already realized that, when an exception is raised, it will be *loooong* , right? To help you with that, *HandySpark* analyzes the error message and parses it nicely for you at the very *top* of the error message, in *bold red* : .. image:: /images/handy_exception.png :target: /images/handy_exception.png :alt: exception Safety first ^^^^^^^^^^^^ *HandySpark* wants to protect your cluster and network, so it implements a *safety* whenever you perform an operation that are going to retrieve *ALL* data from your *HandyFrame* , like ``collect`` or ``toPandas``. How does that work? Every time a *HandyFrame* has one of these methods called, it will output up to the *safety limit* , which has a default of *1,000 elements*. .. image:: /images/safety_on.png :target: /images/safety_on.png :alt: safety on Do you want to set a different safety limit for your *HandyFrame* ? .. image:: /images/safety_limit.png :target: /images/safety_limit.png :alt: safety limit What if you want to retrieve everything nonetheless?! You can invoke the *safety_off* method prior to the actual method you want to call and you get a *one-time* unlimited result. .. image:: /images/safety_off.png :target: /images/safety_off.png :alt: safety off Don't feel like Handy anymore? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To get back your original Spark dataframe, you only need to call *notHandy* to make it not handy again: .. code-block:: python hdf_fenced.notHandy() .. code-block:: DataFrame[PassengerId: int, Survived: int, Pclass: int, Name: string, Sex: string, Age: double, SibSp: int, Parch: int, Ticket: string, Fare: double, Cabin: string, Embarked: string, logFare: double, is_c_or_q: boolean] Comments, questions, suggestions, bugs -------------------------------------- *DISCLAIMER* : this is a project *under development* , so it is likely you'll run into bugs/problems. So, if you find any bugs/problems, please open an `issue `_ or submit a `pull request `_. ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = HandySpark SOURCEDIR = source BUILDDIR = ../../handyspark-docs # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/source/conf.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # # HandySpark documentation build configuration file, created by # sphinx-quickstart on Sun Oct 28 17:42:51 2018. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('../..')) sys.setrecursionlimit(1500) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.githubpages', 'sphinx.ext.napoleon'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # General information about the project. project = 'HandySpark' copyright = '2018, Daniel Voigt Godoy' author = 'Daniel Voigt Godoy' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.0.1' # The full version, including alpha/beta/rc tags. release = '0.0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { '**': [ 'relations.html', # needs 'show_related': True theme option to display 'searchbox.html', ] } # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'HandySparkdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'HandySpark.tex', 'HandySpark Documentation', 'Daniel Voigt Godoy', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'handyspark', 'HandySpark Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'HandySpark', 'HandySpark Documentation', author, 'HandySpark', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ---------------------------------------------- # Bibliographic Dublin Core info. epub_title = project epub_author = author epub_publisher = author epub_copyright = copyright # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None} ================================================ FILE: docs/source/handyspark.extensions.rst ================================================ handyspark\.extensions package ============================== Submodules ---------- handyspark\.extensions\.common module ------------------------------------- .. automodule:: handyspark.extensions.common :members: :undoc-members: :show-inheritance: handyspark\.extensions\.evaluation module ----------------------------------------- .. automodule:: handyspark.extensions.evaluation :members: :undoc-members: :show-inheritance: handyspark\.extensions\.types module ------------------------------------ .. automodule:: handyspark.extensions.types :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: handyspark.extensions :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/handyspark.ml.rst ================================================ handyspark\.ml package ====================== Submodules ---------- handyspark\.ml\.base module --------------------------- .. automodule:: handyspark.ml.base :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: handyspark.ml :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/handyspark.rst ================================================ handyspark package ================== Subpackages ----------- .. toctree:: handyspark.extensions handyspark.ml handyspark.sql Submodules ---------- handyspark\.plot module ----------------------- .. automodule:: handyspark.plot :members: :undoc-members: :show-inheritance: handyspark\.stats module ------------------------ .. automodule:: handyspark.stats :members: :undoc-members: :show-inheritance: handyspark\.util module ----------------------- .. automodule:: handyspark.util :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: handyspark :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/handyspark.sql.rst ================================================ handyspark\.sql package ======================= Submodules ---------- handyspark\.sql\.dataframe module --------------------------------- .. automodule:: handyspark.sql.dataframe :members: :undoc-members: :show-inheritance: handyspark\.sql\.datetime module -------------------------------- .. automodule:: handyspark.sql.datetime :members: :undoc-members: :show-inheritance: handyspark\.sql\.pandas module ------------------------------ .. automodule:: handyspark.sql.pandas :members: :undoc-members: :show-inheritance: handyspark\.sql\.schema module ------------------------------ .. automodule:: handyspark.sql.schema :members: :undoc-members: :show-inheritance: handyspark\.sql\.string module ------------------------------ .. automodule:: handyspark.sql.string :members: :undoc-members: :show-inheritance: handyspark\.sql\.transform module --------------------------------- .. automodule:: handyspark.sql.transform :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: handyspark.sql :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/includeme.rst ================================================ .. include:: ../../README.rst ================================================ FILE: docs/source/index.rst ================================================ .. HandySpark documentation master file, created by sphinx-quickstart on Sun Oct 28 17:42:51 2018. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to HandySpark's documentation! ====================================== .. toctree:: :maxdepth: 2 includeme Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/source/modules.rst ================================================ handyspark ========== .. toctree:: :maxdepth: 4 handyspark ================================================ FILE: handyspark/__init__.py ================================================ from handyspark.extensions.evaluation import BinaryClassificationMetrics from handyspark.sql import HandyFrame, Bucket, Quantile, DataFrame __all__ = [ 'HandyFrame', 'Bucket', 'Quantile', 'BinaryClassificationMetrics' ] ================================================ FILE: handyspark/extensions/__init__.py ================================================ from handyspark.extensions.common import JavaModelWrapper from handyspark.extensions.evaluation import BinaryClassificationMetrics from handyspark.extensions.types import AtomicType __all__ = [ 'BinaryClassificationMetrics' ] ================================================ FILE: handyspark/extensions/common.py ================================================ from pyspark.mllib.common import _java2py, _py2java, JavaModelWrapper def call2(self, name, *a): """Another call method for JavaModelWrapper. This method should be used whenever the JavaModel returns a Scala Tuple that needs to be deserialized before converted to Python. """ serde = self._sc._jvm.org.apache.spark.mllib.api.python.SerDe args = [_py2java(self._sc, a) for a in a] java_res = getattr(self._java_model, name)(*args) java_res = serde.fromTuple2RDD(java_res) res = _java2py(self._sc, java_res) return res JavaModelWrapper.call2 = call2 ================================================ FILE: handyspark/extensions/evaluation.py ================================================ import pandas as pd from operator import itemgetter from handyspark.plot import roc_curve, pr_curve from pyspark.mllib.evaluation import BinaryClassificationMetrics, MulticlassMetrics from pyspark.sql import SQLContext, DataFrame, functions as F from pyspark.sql.types import StructField, StructType, DoubleType def thresholds(self): """ * Returns thresholds in descending order. """ return self.call('thresholds') def roc(self): """Calls the `roc` method from the Java class * Returns the receiver operating characteristic (ROC) curve, * which is an RDD of (false positive rate, true positive rate) * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it. * @see * Receiver operating characteristic (Wikipedia) """ return self.call2('roc') def pr(self): """Calls the `pr` method from the Java class * Returns the precision-recall curve, which is an RDD of (recall, precision), * NOT (precision, recall), with (0.0, p) prepended to it, where p is the precision * associated with the lowest recall on the curve. * @see * Precision and recall (Wikipedia) """ return self.call2('pr') def fMeasureByThreshold(self, beta=1.0): """Calls the `fMeasureByThreshold` method from the Java class * Returns the (threshold, F-Measure) curve. * @param beta the beta factor in F-Measure computation. * @return an RDD of (threshold, F-Measure) pairs. * @see F1 score (Wikipedia) """ return self.call2('fMeasureByThreshold', beta) def precisionByThreshold(self): """Calls the `precisionByThreshold` method from the Java class * Returns the (threshold, precision) curve. """ return self.call2('precisionByThreshold') def recallByThreshold(self): """Calls the `recallByThreshold` method from the Java class * Returns the (threshold, recall) curve. """ return self.call2('recallByThreshold') def getMetricsByThreshold(self): """Returns DataFrame containing all metrics (FPR, Recall and Precision) for every threshold. Returns ------- metrics: DataFrame """ thresholds = self.call('thresholds').collect() roc = self.call2('roc').collect()[1:-1] pr = self.call2('pr').collect()[1:] metrics = list(zip(thresholds, map(itemgetter(0), roc), map(itemgetter(1), roc), map(itemgetter(1), pr))) metrics += [(0., 1., 1., 0.)] sql_ctx = SQLContext.getOrCreate(self._sc) df = sql_ctx.createDataFrame(metrics).toDF('threshold', 'fpr', 'recall', 'precision') return df def confusionMatrix(self, threshold=0.5): """Returns confusion matrix: predicted classes are in columns, they are ordered by class label ascending, as in "labels". Predicted classes are computed according to informed threshold. Parameters ---------- threshold: double, optional Threshold probability for the positive class. Default is 0.5. Returns ------- confusionMatrix: DenseMatrix """ scoreAndLabels = self.call2('scoreAndLabels').map(lambda t: (float(t[0] > threshold), t[1])) mcm = MulticlassMetrics(scoreAndLabels) return mcm.confusionMatrix() def print_confusion_matrix(self, threshold=0.5): """Returns confusion matrix: predicted classes are in columns, they are ordered by class label ascending, as in "labels". Predicted classes are computed according to informed threshold. Parameters ---------- threshold: double, optional Threshold probability for the positive class. Default is 0.5. Returns ------- confusionMatrix: pd.DataFrame """ cm = self.confusionMatrix(threshold).toArray() df = pd.concat([pd.DataFrame(cm)], keys=['Actual'], names=[]) df.columns = pd.MultiIndex.from_product([['Predicted'], df.columns]) return df def plot_roc_curve(self, ax=None): """Makes a plot of Receiver Operating Characteristic (ROC) curve. Parameter --------- ax : matplotlib axes object, default None """ metrics = self.getMetricsByThreshold().toPandas() return roc_curve(metrics.fpr, metrics.recall, self.areaUnderROC, ax) def plot_pr_curve(self, ax=None): """Makes a plot of Precision-Recall (PR) curve. Parameter --------- ax : matplotlib axes object, default None """ metrics = self.getMetricsByThreshold().toPandas() return pr_curve(metrics.precision, metrics.recall, self.areaUnderPR, ax) def __init__(self, scoreAndLabels, scoreCol='score', labelCol='label'): if isinstance(scoreAndLabels, DataFrame): scoreAndLabels = (scoreAndLabels .select(scoreCol, labelCol) .rdd.map(lambda row:(float(row[scoreCol][1]), float(row[labelCol])))) sc = scoreAndLabels.ctx sql_ctx = SQLContext.getOrCreate(sc) df = sql_ctx.createDataFrame(scoreAndLabels, schema=StructType([ StructField("score", DoubleType(), nullable=False), StructField("label", DoubleType(), nullable=False)])) java_class = sc._jvm.org.apache.spark.mllib.evaluation.BinaryClassificationMetrics java_model = java_class(df._jdf) super(BinaryClassificationMetrics, self).__init__(java_model) BinaryClassificationMetrics.__init__ = __init__ BinaryClassificationMetrics.thresholds = thresholds BinaryClassificationMetrics.roc = roc BinaryClassificationMetrics.pr = pr BinaryClassificationMetrics.fMeasureByThreshold = fMeasureByThreshold BinaryClassificationMetrics.precisionByThreshold = precisionByThreshold BinaryClassificationMetrics.recallByThreshold = recallByThreshold BinaryClassificationMetrics.getMetricsByThreshold = getMetricsByThreshold BinaryClassificationMetrics.confusionMatrix = confusionMatrix BinaryClassificationMetrics.plot_roc_curve = plot_roc_curve BinaryClassificationMetrics.plot_pr_curve = plot_pr_curve BinaryClassificationMetrics.print_confusion_matrix = print_confusion_matrix ================================================ FILE: handyspark/extensions/types.py ================================================ from pyspark.sql.types import AtomicType, ArrayType, MapType @classmethod def ret(cls, expr): """Assigns a return type to the expression when used inside an `assign` method. """ return expr, cls.typeName() AtomicType.ret = ret def ret(self, expr): """Assigns a return type to the expression when used inside an `assign` method. """ return expr, self.simpleString() ArrayType.ret = ret MapType.ret = ret ================================================ FILE: handyspark/ml/__init__.py ================================================ from handyspark.ml.base import HandyFencer, HandyImputer __all__ = [ 'HandyFencer', 'HandyImputer' ] ================================================ FILE: handyspark/ml/base.py ================================================ import json from pyspark.ml.base import Transformer from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable from pyspark.ml.param import * from pyspark.sql import functions as F class HandyTransformers(object): """Generates transformers to be used in pipelines. Available transformers: imputer: Transformer Imputation transformer for completing missing values. fencer: Transformer Fencer transformer for capping outliers according to lower and upper fences. """ def __init__(self, df): self._df = df self._handy = df._handy def imputer(self): """ Generates a transformer to impute missing values, using values from the HandyFrame """ return HandyImputer().setDictValues(self._df.statistics_) def fencer(self): """ Generates a transformer to fence outliers, using statistics from the HandyFrame """ return HandyFencer().setDictValues(self._df.fences_) class HasDict(Params): """Mixin for a Dictionary parameter. It dumps the dictionary into a JSON string for storage and reloads it whenever needed. """ dictValues = Param(Params._dummy(), "dictValues", "Dictionary values", typeConverter=TypeConverters.toString) def __init__(self): super(HasDict, self).__init__() self._setDefault(dictValues='{}') def setDictValues(self, value): """ Sets the value of :py:attr:`dictValues`. """ if isinstance(value, dict): value = json.dumps(value).replace('\'', '"') return self._set(dictValues=value) def getDictValues(self): """ Gets the value of dictValues or its default value. """ values = self.getOrDefault(self.dictValues) return json.loads(values) class HandyImputer(Transformer, HasDict, DefaultParamsReadable, DefaultParamsWritable): """Imputation transformer for completing missing values. Attributes ---------- statistics : dict The imputation fill value for each feature. If stratified, first level keys are filter clauses for stratification. """ def _transform(self, dataset): # Loads dictionary with values for imputation fillingValues = self.getDictValues() items = fillingValues.items() target = dataset # Loops over columns... for colname, v in items: # If value is another dictionary, it means we're dealing with # stratified imputation - the key is the filering clause # and its value is going to be used for imputation if isinstance(v, dict): clauses = v.keys() whens = ' '.join(['WHEN (({clause}) AND (isnan({col}) OR isnull({col}))) THEN {quote}{filling}{quote}' .format(clause=clause, col=colname, filling=v[clause], quote='"' if isinstance(v[clause], str) else '') for clause in clauses]) # Otherwise uses the non-stratified dictionary to fill the values else: whens = ('WHEN (isnan({col}) OR isnull({col})) THEN {quote}{filling}{quote}' .format(col=colname, filling=v, quote='"' if isinstance(v, str) else '')) expression = F.expr('CASE {expr} ELSE {col} END'.format(expr=whens, col=colname)) target = target.withColumn(colname, expression) # If it is a HandyFrame, make it a regular DataFrame try: target = target.notHandy() except AttributeError: pass return target @property def statistics(self): return self.getDictValues() class HandyFencer(Transformer, HasDict, DefaultParamsReadable, DefaultParamsWritable): """Fencer transformer for capping outliers according to lower and upper fences. Attributes ---------- fences : dict The fence values for each feature. If stratified, first level keys are filter clauses for stratification. """ def _transform(self, dataset): # Loads dictionary with values for fencing fences = self.getDictValues() items = fences.items() target = dataset for colname, v in items: # If value is another dictionary, it means we're dealing with # stratified imputation - the key is the filering clause # and its value is going to be used for imputation if isinstance(v, dict): clauses = v.keys() whens1 = ' '.join(['WHEN ({clause}) THEN greatest({col}, {fence})'.format(clause=clause, col=colname, fence=v[clause][0]) for clause in clauses]) whens2 = ' '.join(['WHEN ({clause}) THEN least({col}, {fence})'.format(clause=clause, col=colname, fence=v[clause][1]) for clause in clauses]) expression1 = F.expr('CASE {} END'.format(whens1)) expression2 = F.expr('CASE {} END'.format(whens2)) # Otherwise uses the non-stratified dictionary to fill the values else: expression1 = F.expr('greatest({col}, {fence})'.format(col=colname, fence=v[0])) expression2 = F.expr('least({col}, {fence})'.format(col=colname, fence=v[1])) target = target.withColumn(colname, expression1).withColumn(colname, expression2) # If it is a HandyFrame, make it a regular DataFrame try: target = target.notHandy() except AttributeError: pass return target @property def fences(self): return self.getDictValues() ================================================ FILE: handyspark/plot.py ================================================ import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns from inspect import signature from handyspark.util import get_buckets, none2zero, ensure_list from operator import add, itemgetter from pyspark.ml.feature import Bucketizer from pyspark.ml.pipeline import Pipeline from pyspark.sql import functions as F from matplotlib.artist import setp import matplotlib as mpl mpl.rc("lines", markeredgewidth=0.5) def title_fom_clause(clause): return clause.replace(' and ', '\n').replace(' == ', '=').replace('"', '') def consolidate_plots(fig, axs, title, clauses): axs[0].set_title(title) fig.tight_layout() if len(axs) > 1: assert len(axs) == len(clauses), 'Mismatched number of plots and clauses!' xlim = list(map(lambda ax: ax.get_xlim(), axs)) xlim = [np.min(list(map(itemgetter(0), xlim))), np.max(list(map(itemgetter(1), xlim)))] ylim = list(map(lambda ax: ax.get_ylim(), axs)) ylim = [np.min(list(map(itemgetter(0), ylim))), np.max(list(map(itemgetter(1), ylim)))] for i, ax in enumerate(axs): subtitle = title_fom_clause(clauses[i]) ax.set_title(subtitle, fontdict={'fontsize': 10}) ax.set_xlim(xlim) ax.set_ylim(ylim) #if ax.colNum > 0: # ax.get_yaxis().set_visible(False) #if ax.rowNum < (ax.numRows - 1): # ax.get_xaxis().set_visible(False) if isinstance(title, list): title = ', '.join(title) fig.suptitle(title) fig.tight_layout() fig.subplots_adjust(top=0.85) return fig, axs ### Correlations def plot_correlations(pdf, ax=None): if ax is None: fig, ax = plt.subplots(1, 1) return sns.heatmap(round(pdf,2), annot=True, cmap="coolwarm", fmt='.2f', linewidths=.05, ax=ax) ### Scatterplot def strat_scatterplot(sdf, col1, col2, n=30): stages = [] for col in [col1, col2]: splits = np.linspace(*sdf.agg(F.min(col), F.max(col)).rdd.map(tuple).collect()[0], n + 1) bucket_name = '__{}_bucket'.format(col) stages.append(Bucketizer(splits=splits, inputCol=col, outputCol=bucket_name, handleInvalid="skip")) pipeline = Pipeline(stages=stages) model = pipeline.fit(sdf) return model, sdf.count() def scatterplot(sdf, col1, col2, n=30, ax=None): strat_ax, data = sdf._get_strata() if data is None: data = strat_scatterplot(sdf, col1, col2, n) else: ax = strat_ax model, total = data if ax is None: fig, ax = plt.subplots(1, 1) axes = ensure_list(ax) clauses = sdf._handy._strata_raw_clauses if not len(clauses): clauses = [None] bucket_name1, bucket_name2 = '__{}_bucket'.format(col1), '__{}_bucket'.format(col2) strata = sdf._handy.strata_colnames colnames = strata + [bucket_name1, bucket_name2] result = model.transform(sdf).select(colnames).groupby(colnames).agg(F.count('*').alias('count')).toPandas().sort_values(by=colnames) splits = [bucket.getSplits() for bucket in model.stages] splits = [list(map(np.mean, zip(split[1:], split[:-1]))) for split in splits] splits1 = pd.DataFrame({bucket_name1: np.arange(0, n), col1: splits[0]}) splits2 = pd.DataFrame({bucket_name2: np.arange(0, n), col2: splits[1]}) df_counts = result.merge(splits1).merge(splits2)[strata + [col1, col2, 'count']].rename(columns={'count': 'Proportion'}) df_counts.loc[:, 'Proportion'] = df_counts.Proportion.apply(lambda p: round(p / total, 4)) for ax, clause in zip(axes, clauses): data = df_counts if clause is not None: data = data.query(clause) sns.scatterplot(data=data, x=col1, y=col2, size='Proportion', ax=ax, legend=False) if len(axes) == 1: axes = axes[0] return axes ### Histogram def strat_histogram(sdf, colname, bins=10, categorical=False): if categorical: result = sdf.cols[colname]._value_counts(dropna=False, raw=True) if hasattr(result.index, 'levels'): indexes = pd.MultiIndex.from_product(result.index.levels[:-1] + [result.reset_index()[colname].unique().tolist()], names=result.index.names) result = (pd.DataFrame(index=indexes) .join(result.to_frame(), how='left') .fillna(0)[result.name] .astype(result.dtype)) start_values = result.index.tolist() else: bucket_name = '__{}_bucket'.format(colname) strata = sdf._handy.strata_colnames colnames = strata + ensure_list(bucket_name) start_values = np.linspace(*sdf.agg(F.min(colname), F.max(colname)).rdd.map(tuple).collect()[0], bins + 1) bucketizer = Bucketizer(splits=start_values, inputCol=colname, outputCol=bucket_name, handleInvalid="skip") result = (bucketizer .transform(sdf) .select(colnames) .groupby(colnames) .agg(F.count('*').alias('count')) .toPandas() .sort_values(by=colnames)) indexes = pd.DataFrame({bucket_name: np.arange(0, bins), 'bucket': start_values[:-1]}) if len(strata): indexes = (indexes .assign(key=1) .merge(result[strata].drop_duplicates().assign(key=1), on='key') .drop(columns=['key'])) result = indexes.merge(result, how='left', on=strata + [bucket_name]).fillna(0)[strata + [bucket_name, 'count']] return start_values, result def histogram(sdf, colname, bins=10, categorical=False, ax=None): strat_ax, data = sdf._get_strata() if data is None: data = strat_histogram(sdf, colname, bins, categorical) else: ax = strat_ax start_values, counts = data if ax is None: fig, ax = plt.subplots(1, 1) axes = ensure_list(ax) clauses = sdf._handy._strata_raw_clauses if not len(clauses): clauses = [None] for ax, clause in zip(axes, clauses): if categorical: pdf = counts.sort_index().to_frame() if clause is not None: pdf = pdf.query(clause).reset_index(sdf._handy.strata_colnames).drop(columns=sdf._handy.strata_colnames) pdf.iloc[:bins].plot(kind='bar', color='C0', legend=False, rot=0, ax=ax, title=colname) else: mid_point_bins = start_values[:-1] weights = counts if clause is not None: weights = counts.query(clause) ax.hist(mid_point_bins, bins=start_values, weights=weights['count'].values) ax.set_title(colname) if len(axes) == 1: axes = axes[0] return axes ### Boxplot def _gen_dict(rc_name, properties): """ Loads properties in the dictionary from rc file if not already in the dictionary""" rc_str = 'boxplot.{0}.{1}' dictionary = dict() for prop_dict in properties: dictionary.setdefault(prop_dict, plt.rcParams[rc_str.format(rc_name, prop_dict)]) return dictionary def draw_boxplot(ax, stats): flier_props = ['color', 'marker', 'markerfacecolor', 'markeredgecolor', 'markersize', 'linestyle', 'linewidth'] default_props = ['color', 'linewidth', 'linestyle'] boxprops = _gen_dict('boxprops', default_props) whiskerprops = _gen_dict('whiskerprops', default_props) capprops = _gen_dict('capprops', default_props) medianprops = _gen_dict('medianprops', default_props) meanprops = _gen_dict('meanprops', default_props) flierprops = _gen_dict('flierprops', flier_props) props = dict(boxprops=boxprops, flierprops=flierprops, medianprops=medianprops, meanprops=meanprops, capprops=capprops, whiskerprops=whiskerprops) colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4'] bp = ax.bxp(stats, **props) ax.grid(True) setp(bp['boxes'], color=colors[0], alpha=1) setp(bp['whiskers'], color=colors[0], alpha=1) setp(bp['medians'], color=colors[2], alpha=1) return ax def boxplot(sdf, colnames, ax=None, showfliers=True, k=1.5, precision=.0001): strat_ax, data = sdf._get_strata() if data is None: if ax is None: fig, ax = plt.subplots(1, 1) title_clauses = sdf._handy._strata_clauses if not len(title_clauses): title_clauses = [None] pdf = sdf._handy._calc_fences(colnames, k, precision) stats = [] for colname in colnames: items, _, _ = sdf._handy._calc_bxp_stats(pdf, colname, showfliers=showfliers) for title_clause, item in zip(title_clauses, items): name = colname if len(colnames) > 1 else (title_fom_clause(title_clause) if title_clause is not None else colname) item.update({'label': name}) # each list of items corresponds to a different column stats.append(items) # Stats is a list of columns, containing each a list of clauses if ax is not None: if title_clauses[0] is None: if len(colnames) == 1: stats = stats[0] else: stats = np.squeeze(stats).tolist() return draw_boxplot(ax, stats) else: if len(strat_ax) > 1: stats = [[stats[j][i] for j in range(len(stats))] for i in range(len(title_clauses))] return stats def post_boxplot(axs, stats): new_res = [] for ax, stat in zip(axs, stats): ax = draw_boxplot(ax, stat) new_res.append(ax) return new_res def roc_curve(fpr, tpr, roc_auc, ax=None): if ax is None: fig, ax = plt.subplots(1, 1) ax.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.4f)' % roc_auc) ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') ax.set_xlim([0.0, 1.0]) ax.set_ylim([0.0, 1.05]) ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.set_title('Receiver Operating Characteristic Curve') ax.legend(loc="lower right") return ax def pr_curve(precision, recall, pr_auc, ax=None): if ax is None: fig, ax = plt.subplots(1, 1) # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument step_kwargs = ({'step': 'post'} if 'step' in signature(plt.fill_between).parameters else {}) ax.step(recall, precision, color='b', alpha=0.2, where='post', label='PR curve (area = %0.4f)' % pr_auc) ax.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs) ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.set_ylim([0.0, 1.05]) ax.set_xlim([0.0, 1.0]) ax.legend(loc="lower left") ax.set_title('Precision-Recall Curve') return ax ================================================ FILE: handyspark/sql/__init__.py ================================================ from handyspark.sql.dataframe import HandyFrame, Bucket, Quantile, DataFrame from handyspark.sql.schema import generate_schema __all__ = [ 'HandyFrame', 'Bucket', 'Quantile', 'generate_schema' ] ================================================ FILE: handyspark/sql/dataframe.py ================================================ from copy import deepcopy from handyspark.ml.base import HandyTransformers from handyspark.plot import histogram, boxplot, scatterplot, strat_scatterplot, strat_histogram,\ consolidate_plots, post_boxplot from handyspark.sql.pandas import HandyPandas from handyspark.sql.transform import _MAPPING, HandyTransform from handyspark.util import HandyException, dense_to_array, disassemble, ensure_list, check_columns, \ none2default import inspect from matplotlib.axes import Axes from collections import OrderedDict import matplotlib.pyplot as plt import numpy as np from operator import itemgetter, add import pandas as pd from pyspark.ml.stat import Correlation from pyspark.ml.feature import Bucketizer from pyspark.mllib.stat import Statistics from pyspark.sql import DataFrame, GroupedData, Window, functions as F, Column, Row from pyspark.ml.feature import VectorAssembler, StandardScaler, PCA from pyspark.ml.pipeline import Pipeline from scipy.stats import chi2 from scipy.linalg import inv def toHandy(self): """Converts Spark DataFrame into HandyFrame. """ return HandyFrame(self) def notHandy(self): return self DataFrame.toHandy = toHandy DataFrame.notHandy = notHandy def agg(f): f.__is_agg = True return f def inccol(f): f.__is_inccol = True return f class Handy(object): def __init__(self, df): self._df = df # classification self._is_classification = False self._nclasses = None self._classes = None # transformers self._imputed_values = {} self._fenced_values = {} # groups / strata self._group_cols = None self._strata = None self._strata_object = None self._strata_plot = None self._clear_stratification() self._safety_limit = 1000 self._safety = True self._update_types() def __deepcopy__(self, memo): cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result for k, v in self.__dict__.items(): if k not in ['_df', '_strata_object', '_strata_plot']: setattr(result, k, deepcopy(v, memo)) return result def __getitem__(self, *args): if isinstance(args[0], tuple): args = args[0] item = args[0] n = 20 if len(args) > 1: n = args[1] if n is None: n = -1 if isinstance(item, int): idx = item + (len(self._group_cols) if self._group_cols is not None else 0) assert idx < len(self._df.columns), "Invalid column index {}".format(idx) item = list(self._df.columns)[idx] if isinstance(item, str): if self._group_cols is None or len(self._group_cols) == 0: res = self._take_array(item, n) if res.ndim > 1: res = res.tolist() res = pd.Series(res, name=item) if self._strata is not None: strata = list(map(lambda v: v[1].to_dict(), self.strata.iterrows())) if len(strata) == len(res): res = pd.concat([pd.DataFrame(strata), res], axis=1).set_index(self._strata).sort_index() return res else: check_columns(self._df, list(self._group_cols) + [item]) pdf = self._df.notHandy().select(list(self._group_cols) + [item]) if n != -1: pdf = pdf.limit(n) res = pdf.toPandas().set_index(list(self._group_cols)).sort_index()[item] return res @property def stages(self): return (len(list(filter(lambda v: '+' == v, map(lambda s: s.strip()[0], self._df.rdd.toDebugString().decode().split('\n'))))) + 1) @property def statistics_(self): return self._imputed_values @property def fences_(self): return self._fenced_values @property def is_classification(self): return self._is_classification @property def classes(self): return self._classes @property def nclasses(self): return self._nclasses @property def response(self): return self._response @property def ncols(self): return len(self._types) @property def nrows(self): return self._df.count() @property def shape(self): return (self.nrows, self.ncols) @property def strata(self): if self._strata is not None: return pd.DataFrame(data=self._strata_combinations, columns=self._strata) @property def strata_colnames(self): if self._strata is not None: return list(map(str, ensure_list(self._strata))) else: return [] def _stratify(self, strata): return HandyStrata(self, strata) def _clear_stratification(self): self._strata = None self._strata_object = None self._strata_plot = None self._strata_combinations = [] self._strata_raw_combinations = [] self._strata_clauses = [] self._strata_raw_clauses = [] self._n_cols = 1 self._n_rows = 1 def _set_stratification(self, strata, raw_combinations, raw_clauses, combinations, clauses): if strata is not None: assert len(combinations[0]) == len(strata), "Mismatched number of combinations and strata!" self._strata = strata self._strata_raw_combinations = raw_combinations self._strata_raw_clauses = raw_clauses self._strata_combinations = combinations self._strata_clauses = clauses self._n_cols = len(set(map(itemgetter(0), combinations))) try: self._n_rows = len(set(map(itemgetter(1), combinations))) except IndexError: self._n_rows = 1 def _build_strat_plot(self, n_rows, n_cols, **kwargs): fig, axs = plt.subplots(n_rows, n_cols, **kwargs) if n_rows == 1: axs = [axs] if n_cols == 1: axs = [axs] self._strata_plot = (fig, [ax for col in np.transpose(axs) for ax in col]) def _update_types(self): self._types = list(map(lambda t: (t.name, t.dataType.typeName()), self._df.schema.fields)) self._numerical = list(map(itemgetter(0), filter(lambda t: t[1] in ['byte', 'short', 'integer', 'long', 'float', 'double'], self._types))) self._continuous = list(map(itemgetter(0), filter(lambda t: t[1] in ['double', 'float'], self._types))) self._categorical = list(map(itemgetter(0), filter(lambda t: t[1] in ['byte', 'short', 'integer', 'long', 'boolan', 'string'], self._types))) self._array = list(map(itemgetter(0), filter(lambda t: t[1] in ['array', 'map'], self._types))) self._string = list(map(itemgetter(0), filter(lambda t: t[1] in ['string'], self._types))) def _take_array(self, colname, n): check_columns(self._df, colname) datatype = self._df.notHandy().select(colname).schema.fields[0].dataType.typeName() rdd = self._df.notHandy().select(colname).rdd.map(itemgetter(0)) if n == -1: data = rdd.collect() else: data = rdd.take(n) return np.array(data, dtype=_MAPPING.get(datatype, 'object')) def _value_counts(self, colnames, dropna=True, raw=False): colnames = ensure_list(colnames) strata = self.strata_colnames colnames = strata + colnames check_columns(self._df, colnames) data = self._df.notHandy().select(colnames) if dropna: data = data.dropna() values = (data.groupby(colnames).agg(F.count('*').alias('value_counts')) .toPandas().set_index(colnames).sort_index()['value_counts']) if not raw: for level, col in enumerate(ensure_list(self._strata)): if not isinstance(col, str): values.index.set_levels(pd.Index(col._clauses[1:-1]), level=level, inplace=True) values.index.set_names(col.colname, level=level, inplace=True) return values def _fillna(self, target, values): assert isinstance(target, DataFrame), "Target must be a DataFrame" items = values.items() for colname, v in items: if isinstance(v, dict): clauses = v.keys() whens = ' '.join(['WHEN (({clause}) AND (isnan({col}) OR isnull({col}))) THEN {quote}{filling}{quote}' .format(clause=clause, col=colname, filling=v[clause], quote='"' if isinstance(v[clause], str) else '') for clause in clauses]) else: whens = ('WHEN (isnan({col}) OR isnull({col})) THEN {quote}{filling}{quote}' .format(col=colname, filling=v, quote='"' if isinstance(v, str) else '')) expression = F.expr('CASE {expr} ELSE {col} END'.format(expr=whens, col=colname)) target = target.withColumn(colname, expression) return target def __stat_to_dict(self, colname, stat): if len(self._strata_clauses): if isinstance(stat, pd.Series): stat = stat.to_frame(colname) return {clause: stat.query(raw_clause)[colname].iloc[0] for clause, raw_clause in zip(self._strata_clauses, self._strata_raw_clauses)} else: return stat[colname] def _fill_values(self, continuous, categorical, strategy): values = {} colnames = list(map(itemgetter(0), filter(lambda t: t[1] == 'mean', zip(continuous, strategy)))) values.update(dict([(col, self.__stat_to_dict(col, self.mean(col))) for col in colnames])) colnames = list(map(itemgetter(0), filter(lambda t: t[1] == 'median', zip(continuous, strategy)))) values.update(dict([(col, self.__stat_to_dict(col, self.median(col))) for col in colnames])) values.update(dict([(col, self.__stat_to_dict(col, self.mode(col))) for col in categorical if col in self._categorical])) return values def __fill_self(self, continuous, categorical, strategy): continuous = ensure_list(continuous) categorical = ensure_list(categorical) check_columns(self._df, continuous + categorical) strategy = none2default(strategy, 'mean') if continuous == ['all']: continuous = self._continuous if categorical == ['all']: categorical = self._categorical if isinstance(strategy, (list, tuple)): assert len(continuous) == len(strategy), "There must be a strategy to each column." else: strategy = [strategy] * len(continuous) values = self._fill_values(continuous, categorical, strategy) self._imputed_values.update(values) res = HandyFrame(self._fillna(self._df, values), self) return res def _dense_to_array(self, colname, array_colname): check_columns(self._df, colname) res = dense_to_array(self._df.notHandy(), colname, array_colname) return HandyFrame(res, self) def _agg(self, name, func, colnames): colnames = none2default(colnames, self._df.columns) colnames = ensure_list(colnames) check_columns(self._df, self.strata_colnames + [col for col in colnames if not isinstance(col, Column)]) if func is None: func = getattr(F, name) res = (self._df.notHandy() .groupby(self.strata_colnames) .agg(*(func(col).alias(str(col)) for col in colnames if str(col) not in self.strata_colnames)) .toPandas()) if len(res) == 1: res = res.iloc[0] res.name = name return res def _calc_fences(self, colnames, k=1.5, precision=.01): colnames = none2default(colnames, self._numerical) colnames = ensure_list(colnames) check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] strata = self.strata_colnames pdf = (self._df.notHandy() .groupby(strata) .agg(F.count(F.lit(1)).alias('nrows'), *[F.expr('approx_percentile({}, {}, {})'.format(c, q, 1./precision)).alias('{}_{}%'.format(c, int(q * 100))) for q in [.25, .50, .75] for c in colnames], *[F.mean(c).alias('{}_mean'.format(c)) for c in colnames]).toPandas()) for col in colnames: pdf.loc[:, '{}_iqr'.format(col)] = pdf.loc[:, '{}_75%'.format(col)] - pdf.loc[:, '{}_25%'.format(col)] pdf.loc[:, '{}_lfence'.format(col)] = pdf.loc[:, '{}_25%'.format(col)] - k * pdf.loc[:, '{}_iqr'.format(col)] pdf.loc[:, '{}_ufence'.format(col)] = pdf.loc[:, '{}_75%'.format(col)] + k * pdf.loc[:, '{}_iqr'.format(col)] return pdf def _calc_mahalanobis_distance(self, colnames, output_col='__mahalanobis'): """Computes Mahalanobis distance from origin """ sdf = self._df.notHandy() check_columns(sdf, colnames) # Builds pipeline to assemble feature columns and scale them assembler = VectorAssembler(inputCols=colnames, outputCol='__features') scaler = StandardScaler(inputCol='__features', outputCol='__scaled', withMean=True) pipeline = Pipeline(stages=[assembler, scaler]) features = pipeline.fit(sdf).transform(sdf) # Computes correlation between features and inverts it # Since we scaled the features, we can assume they have unit variance # and therefore, correlation and covariance matrices are the same! mat = Correlation.corr(features, '__scaled').head()[0].toArray() inv_mat = inv(mat) # Builds Pandas UDF to compute Mahalanobis distance from origin # sqrt((V - 0) * inv_M * (V - 0)) try: import pyarrow @F.pandas_udf('double') def pudf_mult(v): return v.apply(lambda v: np.sqrt(np.dot(np.dot(v, inv_mat), v))) except: @F.udf('double') def pudf_mult(v): return v.apply(lambda v: np.sqrt(np.dot(np.dot(v, inv_mat), v))) # Convert feature vector into array features = dense_to_array(features, '__scaled', '__array_scaled') # Computes Mahalanobis distance and flags as outliers all elements above critical value distance = (features .withColumn('__mahalanobis', pudf_mult('__array_scaled')) .drop('__features', '__scaled', '__array_scaled')) return distance def _set_mahalanobis_outliers(self, colnames, critical_value=.999, input_col='__mahalanobis', output_col='__outlier'): """Compares Mahalanobis distances to critical values using Chi-Squared distribution to identify possible outliers. """ distance = self._calc_mahalanobis_distance(colnames) # Computes critical value critical_value = chi2.ppf(critical_value, len(colnames)) # Computes Mahalanobis distance and flags as outliers all elements above critical value outlier = (distance.withColumn(output_col, F.col(input_col) > critical_value)) return outlier def _calc_bxp_stats(self, fences_df, colname, showfliers=False): strata = self.strata_colnames clauses = self._strata_raw_clauses if not len(clauses): clauses = [None] qnames = ['25%', '50%', '75%', 'mean', 'lfence', 'ufence'] col_summ = fences_df[strata + ['{}_{}'.format(colname, q) for q in qnames] + ['nrows']] col_summ.columns = strata + qnames + ['nrows'] if len(strata): col_summ = col_summ.set_index(strata) lfence, ufence = col_summ[['lfence']], col_summ[['ufence']] expression = None for clause in clauses: if clause is not None: partial = F.col(colname).between(lfence.query(clause).iloc[0, 0], ufence.query(clause).iloc[0, 0]) partial &= F.expr(clause) else: partial = F.col(colname).between(lfence.iloc[0, 0], ufence.iloc[0, 0]) if expression is None: expression = partial else: expression |= partial outlier = self._df.notHandy().withColumn('__{}_outlier'.format(colname), ~expression) minmax = (outlier .filter('not __{}_outlier'.format(colname)) .groupby(strata) .agg(F.min(colname).alias('min'), F.max(colname).alias('max')) .toPandas()) if len(strata): minmax = [minmax.query(clause).iloc[0][['min', 'max']].values for clause in clauses] else: minmax = [minmax.iloc[0][['min', 'max']].values] fliers_df = outlier.filter('__{}_outlier'.format(colname)) fliers_df = [fliers_df.filter(clause) for clause in clauses] if len(strata) else [fliers_df] fliers_count = [df.count() for df in fliers_df] if showfliers: fliers = [(df .select(F.abs(F.col(colname)).alias(colname)) .orderBy(F.desc(colname)) .limit(1000) .toPandas()[colname].values) for df in fliers_df] else: fliers = [[]] * len(clauses) stats = [] # each item corresponds to a different clause - all items belong to the same column nrows = [] for clause, whiskers, outliers in zip(clauses, minmax, fliers): summary = col_summ if clause is not None: summary = summary.query(clause) item = {'mean': summary['mean'].values[0], 'med': summary['50%'].values[0], 'q1': summary['25%'].values[0], 'q3': summary['75%'].values[0], 'whislo': whiskers[0], 'whishi': whiskers[1], 'fliers': outliers} stats.append(item) nrows.append(summary['nrows'].values[0]) if not len(nrows): nrows = summary['nrows'].values[0] return stats, fliers_count, nrows def set_response(self, colname): check_columns(self._df, colname) self._response = colname if colname is not None: if colname not in self._continuous: self._is_classification = True self._classes = self._df.notHandy().select(colname).rdd.map(itemgetter(0)).distinct().collect() self._nclasses = len(self._classes) return self def disassemble(self, colname, new_colnames=None): check_columns(self._df, colname) res = disassemble(self._df.notHandy(), colname, new_colnames) return HandyFrame(res, self) def to_metrics_RDD(self, prob_col, label): check_columns(self._df, [prob_col, label]) return self.disassemble(prob_col).select('{}_1'.format(prob_col), F.col(label).cast('double')).rdd.map(tuple) def corr(self, colnames=None, method='pearson'): colnames = none2default(colnames, self._numerical) colnames = ensure_list(colnames) check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] if self._strata is not None: colnames = sorted([col for col in colnames if col not in self.strata_colnames]) correlations = Statistics.corr(self._df.notHandy().select(colnames).dropna().rdd.map(lambda row: row[0:]), method=method) pdf = pd.DataFrame(correlations, columns=colnames, index=colnames) return pdf def fill(self, *args, continuous=None, categorical=None, strategy=None): if len(args) and isinstance(args[0], DataFrame): return self._fillna(args[0], self._imputed_values) else: return self.__fill_self(continuous=continuous, categorical=categorical, strategy=strategy) @agg def isnull(self, ratio=False): def func(colname): return F.sum(F.isnull(colname).cast('int')).alias(colname) name = 'missing' if ratio: name += '(ratio)' missing = self._agg(name, func, self._df.columns) if ratio: nrows = self._agg('nrows', F.sum, F.lit(1)) if isinstance(missing, pd.Series): missing = missing / nrows["Column"] else: missing.iloc[:, 1:] = missing.iloc[:, 1:].values / nrows["Column"].values.reshape(-1, 1) if len(self.strata_colnames): missing = missing.set_index(self.strata_colnames).T.unstack() missing.name = name return missing @agg def nunique(self, colnames=None): res = self._agg('nunique', F.approx_count_distinct, colnames) if len(self.strata_colnames): res = res.set_index(self.strata_colnames).T.unstack() res.name = 'nunique' return res def outliers(self, colnames=None, ratio=False, method='tukey', **kwargs): colnames = none2default(colnames, self._numerical) colnames = ensure_list(colnames) check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] res = None if method == 'tukey': outliers = [] try: k = float(kwargs['k']) except KeyError: k = 1.5 fences_df = self._calc_fences(colnames, k=k, precision=.01) index = fences_df[self.strata_colnames].set_index(self.strata_colnames).index \ if len(self.strata_colnames) else None for colname in colnames: stats, counts, nrows = self._calc_bxp_stats(fences_df, colname, showfliers=False) outliers.append(pd.Series(counts, index=index, name=colname)) if ratio: outliers[-1] /= nrows res = pd.DataFrame(outliers).unstack() if not len(self.strata_colnames): res = res.droplevel(0) name = 'outliers' if ratio: name += '(ratio)' res.name = name return res def get_outliers(self, colnames=None, critical_value=.999): colnames = none2default(colnames, self._numerical) colnames = ensure_list(colnames) check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] outliers = self._set_mahalanobis_outliers(colnames, critical_value) df = outliers.filter('__outlier').orderBy(F.desc('__mahalanobis')).drop('__outlier', '__mahalanobis') return HandyFrame(df, self) def remove_outliers(self, colnames=None, critical_value=.999): colnames = none2default(colnames, self._numerical) colnames = ensure_list(colnames) check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] outliers = self._set_mahalanobis_outliers(colnames, critical_value) df = outliers.filter('not __outlier').drop('__outlier', '__mahalanobis') return HandyFrame(df, self) def fence(self, colnames, k=1.5): colnames = ensure_list(colnames) check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] pdf = self._calc_fences(colnames, k=k) if len(self.strata_colnames): pdf = pdf.set_index(self.strata_colnames) df = self._df.notHandy() for colname in colnames: lfence, ufence = pdf.loc[:, ['{}_lfence'.format(colname)]], pdf.loc[:, ['{}_ufence'.format(colname)]] if len(self._strata_raw_clauses): whens1 = ' '.join(['WHEN ({clause}) THEN greatest({col}, {fence})'.format(clause=clause, col=colname, fence=lfence.query(clause).iloc[0, 0]) for clause in self._strata_raw_clauses]) whens2 = ' '.join(['WHEN ({clause}) THEN least({col}, {fence})'.format(clause=clause, col=colname, fence=ufence.query(clause).iloc[0, 0]) for clause in self._strata_raw_clauses]) expression1 = F.expr('CASE {} END'.format(whens1)) expression2 = F.expr('CASE {} END'.format(whens2)) self._fenced_values.update({colname: {clause: [lfence.query(clause).iloc[0, 0], ufence.query(clause).iloc[0, 0]] for clause in self._strata_clauses}}) else: self._fenced_values.update({colname: [lfence.iloc[0, 0], ufence.iloc[0, 0]]}) expression1 = F.expr('greatest({col}, {fence})'.format(col=colname, fence=lfence.iloc[0, 0])) expression2 = F.expr('least({col}, {fence})'.format(col=colname, fence=ufence.iloc[0, 0])) df = df.withColumn(colname, expression1).withColumn(colname, expression2) return HandyFrame(df.select(self._df.columns), self) @inccol def value_counts(self, colnames, dropna=True): return self._value_counts(colnames, dropna) @inccol def mode(self, colname): check_columns(self._df, [colname]) if self._strata is None: values = (self._df.notHandy().select(colname).dropna() .groupby(colname).agg(F.count('*').alias('mode')) .orderBy(F.desc('mode')).limit(1) .toPandas()[colname][0]) return pd.Series(values, index=[colname], name='mode') else: strata = self.strata_colnames colnames = strata + [colname] values = (self._df.notHandy().select(colnames).dropna() .groupby(colnames).agg(F.count('*').alias('mode')) .withColumn('order', F.row_number().over(Window.partitionBy(strata).orderBy(F.desc('mode')))) .filter('order == 1').drop('order') .toPandas().set_index(strata).sort_index()[colname]) values.name = 'mode' return values @inccol def entropy(self, colnames): colnames = ensure_list(colnames) check_columns(self._df, colnames) sdf = self._df.notHandy() n = sdf.count() entropy = [] for colname in colnames: if colname in self._categorical: res = (self._df .groupby(self.strata_colnames + [colname]) .agg(F.count('*').alias('value_counts')).withColumn('probability', F.col('value_counts') / n) .groupby(self.strata_colnames) .agg(F.sum(F.expr('-log2(probability) * probability')).alias(colname)) .safety_off() .cols[self.strata_colnames + [colname]][:]) if len(self.strata_colnames): res.set_index(self.strata_colnames, inplace=True) res = res.unstack() else: res = res[colname] res.index = [colname] else: res = pd.Series(None, index=[colname]) res.name = 'entropy' entropy.append(res) return pd.concat(entropy).sort_index() @inccol def mutual_info(self, colnames): def distribution(sdf, colnames): return sdf.groupby(colnames).agg(F.count('*').alias('__count')) check_columns(self._df, colnames) n = len(colnames) probs = [] sdf = self._df.notHandy() for i in range(n): probs.append(distribution(sdf, self.strata_colnames + [colnames[i]])) if len(self.strata_colnames): nrows = sdf.groupby(self.strata_colnames).agg(F.count('*').alias('__n')) else: nrows = sdf.count() entropies = self.entropy(colnames) res = [] for i in range(n): for j in range(i, n): if i == j: mi = pd.Series(entropies[colnames[i]], name='mi').to_frame() else: tdf = distribution(sdf, self.strata_colnames + [colnames[i], colnames[j]]) if len(self.strata_colnames): tdf = tdf.join(nrows, on=self.strata_colnames) else: tdf = tdf.withColumn('__n', F.lit(nrows)) tdf = tdf.join(probs[i].toDF(*self.strata_colnames, colnames[i], '__count0'), on=self.strata_colnames + [colnames[i]]) tdf = tdf.join(probs[j].toDF(*self.strata_colnames, colnames[j], '__count1'), on=self.strata_colnames + [colnames[j]]) mi = (tdf .groupby(self.strata_colnames) .agg(F.sum(F.expr('log2(__count * __n / (__count0 * __count1)) * __count / __n')).alias('mi')) .toPandas()) if len(self.strata_colnames): mi.set_index(self.strata_colnames, inplace=True) res.append(mi.assign(ci=colnames[j], cj=colnames[i])) res.append(mi.assign(ci=colnames[i], cj=colnames[j])) res = pd.concat(res).set_index(['ci', 'cj'], append=len(self.strata_colnames)).sort_index() res = pd.pivot_table(res, index=self.strata_colnames + ['ci'], columns=['cj']) res.index.names = self.strata_colnames + [''] res.columns = res.columns.droplevel(0).rename('') return res @agg def mean(self, colnames): return self._agg('mean', F.mean, colnames) @agg def min(self, colnames): return self._agg('min', F.min, colnames) @agg def max(self, colnames): return self._agg('max', F.max, colnames) @agg def percentile(self, colnames, perc=50, precision=.01): def func(c): return F.expr('approx_percentile({}, {}, {})'.format(c, perc/100., 1./precision)) try: name = {25: 'q1', 50: 'median', 75: 'q3'}[perc] except KeyError: name = 'percentile_{}'.format(perc) return self._agg(name, func, colnames) @agg def median(self, colnames, precision=.01): return self.percentile(colnames, 50, precision) @agg def stddev(self, colnames): return self._agg('stddev', F.stddev, colnames) @agg def var(self, colnames): return self._agg('var', F.stddev, colnames) ** 2 @agg def q1(self, colnames, precision=.01): return self.percentile(colnames, 25, precision) @agg def q3(self, colnames, precision=.01): return self.percentile(colnames, 75, precision) ### Boxplot functions def _strat_boxplot(self, colnames, **kwargs): n_rows = n_cols = 1 kwds = deepcopy(kwargs) for kw in ['showfliers', 'precision']: try: del kwds[kw] except KeyError: pass if isinstance(colnames, (tuple, list)) and (len(colnames) > 1): n_rows = self._n_rows n_cols = self._n_cols self._build_strat_plot(n_rows, n_cols, **kwds) return None @inccol def boxplot(self, colnames, ax=None, showfliers=True, k=1.5, precision=.01, **kwargs): colnames = ensure_list(colnames) check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] assert len(colnames), "Only numerical columns can be plot!" return boxplot(self._df, colnames, ax, showfliers, k, precision) def _post_boxplot(self, res): return post_boxplot(self._strata_plot[1], res) ### Scatterplot functions def _strat_scatterplot(self, colnames, **kwargs): self._build_strat_plot(self._n_rows, self._n_cols, **kwargs) return strat_scatterplot(self._df.notHandy(), colnames[0], colnames[1]) @inccol def scatterplot(self, colnames, ax=None, **kwargs): assert len(colnames) == 2, "There must be two columns to plot!" check_columns(self._df, colnames) colnames = [col for col in colnames if col in self._numerical] assert len(colnames) == 2, "Both columns must be numerical!" return scatterplot(self._df, colnames[0], colnames[1], ax=ax) ### Histogram functions def _strat_hist(self, colname, bins=10, **kwargs): self._build_strat_plot(self._n_rows, self._n_cols, **kwargs) categorical = True if colname in self._continuous: categorical = False #res = strat_histogram(self._df.notHandy(), colname, bins, categorical) res = strat_histogram(self._df, colname, bins, categorical) self._strata_plot[0].suptitle('') plt.tight_layout() return res @inccol def hist(self, colname, bins=10, ax=None, **kwargs): # TO DO # include split per response/columns assert len(ensure_list(colname)) == 1, "Only single columns can be plot!" check_columns(self._df, colname) if colname in self._continuous: return histogram(self._df, colname, bins=bins, categorical=False, ax=ax) else: return histogram(self._df, colname, bins=bins, categorical=True, ax=ax) class HandyGrouped(GroupedData): def __init__(self, jgd, df, *args): self._jgd = jgd self._df = df self.sql_ctx = df.sql_ctx self._cols = args def agg(self, *exprs): df = super().agg(*exprs) handy = deepcopy(self._df._handy) handy._group_cols = self._cols return HandyFrame(df, handy) def __repr__(self): return "HandyGrouped[%s]" % (", ".join("%s" % c for c in self._group_cols)) class HandyFrame(DataFrame): """HandySpark version of DataFrame. Attributes ---------- cols: HandyColumns class to access pandas-like column based methods implemented in Spark pandas: HandyPandas class to access pandas-like column based methods through pandas UDFs transformers: HandyTransformers class to generate Handy transformers stages: integer number of stages in the execution plan response: string name of the response column is_classification: boolean True if response is a categorical variable classes: list list of classes for a classification problem nclasses: integer number of classes for a classification problem ncols: integer number of columns of the HandyFrame nrows: integer number of rows of the HandyFrame shape: tuple tuple representing dimensionality of the HandyFrame statistics_: dict imputation fill value for each feature If stratified, first level keys are filter clauses for stratification fences_: dict fence values for each feature If stratified, first level keys are filter clauses for stratification is_stratified: boolean True if HandyFrame was stratified values: ndarray Numpy representation of HandyFrame. Available methods: - notHandy: makes it a plain Spark dataframe - stratify: used to perform stratified operations - isnull: checks for missing values - fill: fills missing values - outliers: returns counts of outliers, columnwise, using Tukey's method - get_outliers: returns list of outliers using Mahalanobis distance - remove_outliers: filters out outliers using Mahalanobis distance - fence: fences outliers - set_safety_limit: defines new safety limit for collect operations - safety_off: disables safety limit for a single operation - assign: appends a new columns based on an expression - nunique: returns number of unique values in each column - set_response: sets column to be used as response / label - disassemble: turns a vector / array column into multiple columns - to_metrics_RDD: turns probability and label columns into a tuple RDD """ def __init__(self, df, handy=None): super().__init__(df._jdf, df.sql_ctx) if handy is None: handy = Handy(self) else: handy = deepcopy(handy) handy._df = self handy._update_types() self._handy = handy self._safety = self._handy._safety self._safety_limit = self._handy._safety_limit self.__overriden = ['collect', 'take'] self._strat_handy = None self._strat_index = None def __getattribute__(self, name): attr = object.__getattribute__(self, name) if hasattr(attr, '__call__') and name not in self.__overriden: def wrapper(*args, **kwargs): try: res = attr(*args, **kwargs) except HandyException as e: raise HandyException(str(e), summary=False) except Exception as e: raise HandyException(str(e), summary=True) if name != 'notHandy': if not isinstance(res, HandyFrame): if isinstance(res, DataFrame): res = HandyFrame(res, self._handy) if isinstance(res, GroupedData): res = HandyGrouped(res._jgd, res._df, *args) return res return wrapper else: return attr def __repr__(self): return "HandyFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes)) def _get_strata(self): plot = None object = None if self._strat_handy is not None: try: object = self._strat_handy._strata_object except AttributeError: pass if object is None: object = True try: plots = self._strat_handy._strata_plot[1] #if len(plots) > 1: # plot = plots[self._strat_index] plot = plots except (AttributeError, IndexError): pass return plot, object def _gen_row_ids(self, *args): # EXPERIMENTAL - DO NOT USE! return (self .sort(*args) .withColumn('_miid', F.monotonically_increasing_id()) .withColumn('_row_id', F.row_number().over(Window().orderBy(F.col('_miid')))) .drop('_miid')) def _loc(self, lower_bound, upper_bound): # EXPERIMENTAL - DO NOT USE! assert '_row_id' in self.columns, "Cannot use LOC without generating `row_id`s first!" clause = F.col('_row_id').between(lower_bound, upper_bound) return self.filter(clause) @property def cols(self): """Returns a class to access pandas-like column based methods implemented in Spark Available methods: - min - max - median - q1 - q3 - stddev - value_counts - mode - corr - nunique - hist - boxplot - scatterplot """ return HandyColumns(self, self._handy) @property def pandas(self): """Returns a class to access pandas-like column based methods through pandas UDFs Available methods: - betweeen / between_time - isin - isna / isnull - notna / notnull - abs - clip / clip_lower / clip_upper - replace - round / truncate - tz_convert / tz_localize """ return HandyPandas(self) @property def transformers(self): """Returns a class to generate Handy transformers Available transformers: - HandyImputer - HandyFencer """ return HandyTransformers(self) @property def stages(self): """Returns the number of stages in the execution plan. """ return self._handy.stages @property def response(self): """Returns the name of the response column. """ return self._handy.response @property def is_classification(self): """Returns True if response is a categorical variable. """ return self._handy.is_classification @property def classes(self): """Returns list of classes for a classification problem. """ return self._handy.classes @property def nclasses(self): """Returns the number of classes for a classification problem. """ return self._handy.nclasses @property def ncols(self): """Returns the number of columns of the HandyFrame. """ return self._handy.ncols @property def nrows(self): """Returns the number of rows of the HandyFrame. """ return self._handy.nrows @property def shape(self): """Return a tuple representing the dimensionality of the HandyFrame. """ return self._handy.shape @property def statistics_(self): """Returns dictionary with imputation fill value for each feature. If stratified, first level keys are filter clauses for stratification. """ return self._handy.statistics_ @property def fences_(self): """Returns dictionary with fence values for each feature. If stratified, first level keys are filter clauses for stratification. """ return self._handy.fences_ @property def values(self): """Numpy representation of HandyFrame. """ # safety limit will kick in, unless explicitly off before tdf = self if self._safety: tdf = tdf.limit(self._safety_limit) return np.array(tdf.rdd.map(tuple).collect()) def notHandy(self): """Converts HandyFrame back into Spark's DataFrame """ return DataFrame(self._jdf, self.sql_ctx) def set_safety_limit(self, limit): """Sets safety limit used for ``collect`` method. """ self._handy._safety_limit = limit self._safety_limit = limit def safety_off(self): """Disables safety limit for a single call of ``collect`` method. """ self._handy._safety = False self._safety = False return self def collect(self): """Returns all the records as a list of :class:`Row`. By default, its output is limited by the safety limit. To get original `collect` behavior, call ``safety_off`` method first. """ try: if self._safety: print('\nINFO: Safety is ON - returning up to {} instances.'.format(self._safety_limit)) return super().limit(self._safety_limit).collect() else: res = super().collect() self._safety = True return res except HandyException as e: raise HandyException(str(e), summary=False) except Exception as e: raise HandyException(str(e), summary=True) def take(self, num): """Returns the first ``num`` rows as a :class:`list` of :class:`Row`. """ self._handy._safety = False res = super().take(num) self._handy._safety = True return res def stratify(self, strata): """Stratify the HandyFrame. Stratified operations should be more efficient than group by operations, as they rely on three iterative steps, namely: filtering the underlying HandyFrame, performing the operation and aggregating the results. """ strata = ensure_list(strata) check_columns(self, strata) return self._handy._stratify(strata) def transform(self, f, name=None, args=None, returnType=None): """INTERNAL USE """ return HandyTransform.transform(self, f, name=name, args=args, returnType=returnType) def apply(self, f, name=None, args=None, returnType=None): """INTERNAL USE """ return HandyTransform.apply(self, f, name=name, args=args, returnType=returnType) def assign(self, **kwargs): """Assign new columns to a HandyFrame, returning a new object (a copy) with all the original columns in addition to the new ones. Parameters ---------- kwargs : keyword, value pairs keywords are the column names. If the values are callable, they are computed on the DataFrame and assigned to the new columns. If the values are not callable, (e.g. a scalar, or string), they are simply assigned. Returns ------- df : HandyFrame A new HandyFrame with the new columns in addition to all the existing columns. """ return HandyTransform.assign(self, **kwargs) @agg def isnull(self, ratio=False): """Returns array with counts of missing value for each column in the HandyFrame. Parameters ---------- ratio: boolean, default False If True, returns ratios instead of absolute counts. Returns ------- counts: Series """ return self._handy.isnull(ratio) @agg def nunique(self): """Return Series with number of distinct observations for all columns. Parameters ---------- exact: boolean, optional If True, computes exact number of unique values, otherwise uses an approximation. Returns ------- nunique: Series """ return self._handy.nunique(self.columns) #, exact) @inccol def outliers(self, ratio=False, method='tukey', **kwargs): """Return Series with number of outlier observations according to the specified method for all columns. Parameters ---------- ratio: boolean, optional If True, returns proportion instead of counts. Default is True. method: string, optional Method used to detect outliers. Currently, only Tukey's method is supported. Default is tukey. Returns ------- outliers: Series """ return self._handy.outliers(self.columns, ratio=ratio, method=method, **kwargs) def get_outliers(self, colnames=None, critical_value=.999): """Returns HandyFrame containing all rows deemed as outliers using Mahalanobis distance and informed critical value. Parameters ---------- colnames: list of str, optional List of columns to be used for computing Mahalanobis distance. Default includes all numerical columns critical_value: float, optional Critical value for chi-squared distribution to classify outliers according to Mahalanobis distance. Default is .999 (99.9%). """ return self._handy.get_outliers(colnames, critical_value) def remove_outliers(self, colnames=None, critical_value=.999): """Returns HandyFrame containing only rows NOT deemed as outliers using Mahalanobis distance and informed critical value. Parameters ---------- colnames: list of str, optional List of columns to be used for computing Mahalanobis distance. Default includes all numerical columns critical_value: float, optional Critical value for chi-squared distribution to classify outliers according to Mahalanobis distance. Default is .999 (99.9%). """ return self._handy.remove_outliers(colnames, critical_value) def set_response(self, colname): """Sets column to be used as response in supervised learning algorithms. Parameters ---------- colname: string Returns ------- self """ check_columns(self, colname) return self._handy.set_response(colname) @inccol def fill(self, *args, categorical=None, continuous=None, strategy=None): """Fill NA/NaN values using the specified methods. The values used for imputation are kept in ``statistics_`` property and can later be used to generate a corresponding HandyImputer transformer. Parameters ---------- categorical: 'all' or list of string, optional List of categorical columns. These columns are filled with its coresponding modes (most common values). continuous: 'all' or list of string, optional List of continuous value columns. By default, these columns are filled with its corresponding means. If a same-sized list is provided in the ``strategy`` argument, it uses the corresponding straegy for each column. strategy: list of string, optional If informed, it must contain a strategy - either ``mean`` or ``median`` - for each one of the continuous columns. Returns ------- df : HandyFrame A new HandyFrame with filled missing values. """ return self._handy.fill(*args, continuous=continuous, categorical=categorical, strategy=strategy) @inccol def fence(self, colnames, k=1.5): """Caps outliers using lower and upper fences given by Tukey's method, using 1.5 times the interquartile range (IQR). The fence values used for capping outliers are kept in ``fences_`` property and can later be used to generate a corresponding HandyFencer transformer. For more information, check: https://en.wikipedia.org/wiki/Outlier#Tukey's_fences Parameters ---------- colnames: list of string Column names to apply fencing. k: float, optional Constant multiplier for the IQR. Default is 1.5 (corresponding to Tukey's outlier, use 3 for "far out" values) Returns ------- df : HandyFrame A new HandyFrame with capped outliers. """ return self._handy.fence(colnames, k=k) def disassemble(self, colname, new_colnames=None): """Disassembles a Vector or Array column into multiple columns. Parameters ---------- colname: string Column containing Vector or Array elements. new_colnames: list of string, optional Default is None, column names are generated using a sequentially generated suffix (e.g., _0, _1, etc.) for ``colname``. If informed, it must have as many column names as elements in the shortest vector/array of ``colname``. Returns ------- df : HandyFrame A new HandyFrame with the new disassembled columns in addition to all the existing columns. """ return self._handy.disassemble(colname, new_colnames) def to_metrics_RDD(self, prob_col='probability', label_col='label'): """Converts a DataFrame containing predicted probabilities and classification labels into a RDD suited for use with ``BinaryClassificationMetrics`` object. Parameters ---------- prob_col: string, optional Column containing Vectors of probabilities. Default is 'probability'. label_col: string, optional Column containing labels. Default is 'label'. Returns ------- rdd: RDD RDD of tuples (probability, label) """ return self._handy.to_metrics_RDD(prob_col, label_col) class Bucket(object): """Bucketizes a column of continuous values into equal sized bins to perform stratification. Parameters ---------- colname: string Column containing continuous values bins: integer Number of equal sized bins to map original values to. Returns ------- bucket: Bucket Bucket object to be used as column in stratification. """ def __init__(self, colname, bins=5): self._colname = colname self._bins = bins self._buckets = None self._clauses = None def __repr__(self): return 'Bucket_{}_{}'.format(self._colname, self._bins) @property def colname(self): return self._colname def _get_buckets(self, df): check_columns(df, self._colname) buckets = ([-float('inf')] + np.linspace(*df.agg(F.min(self._colname), F.max(self._colname)).rdd.map(tuple).collect()[0], self._bins + 1).tolist() + [float('inf')]) buckets[-2] += 1e-7 self._buckets = buckets return buckets def _get_clauses(self, buckets): clauses = [] clauses.append('{} < {:.4f}'.format(self._colname, buckets[1])) for b, e in zip(buckets[1:-2], buckets[2:-1]): clauses.append('{} >= {:.4f} and {} < {:.4f}'.format(self._colname, b, self._colname, e)) clauses[-1] = clauses[-1].replace('<', '<=') clauses.append('{} > {:.4f}'.format(self._colname, buckets[-2])) self._clauses = clauses return clauses class Quantile(Bucket): """Bucketizes a column of continuous values into quantiles to perform stratification. Parameters ---------- colname: string Column containing continuous values bins: integer Number of quantiles to map original values to. Returns ------- quantile: Quantile Quantile object to be used as column in stratification. """ def __repr__(self): return 'Quantile{}_{}'.format(self._colname, self._bins) def _get_buckets(self, df): buckets = ([-float('inf')] + df.approxQuantile(col=self._colname, probabilities=np.linspace(0, 1, self._bins + 1).tolist(), relativeError=0.01) + [float('inf')]) buckets[-2] += 1e-7 return buckets class HandyColumns(object): """HandyColumn(s) in a HandyFrame. Attributes ---------- numerical: list of string List of numerical columns (integer, float, double) categorical: list of string List of categorical columns (string, integer) continuous: list of string List of continous columns (float, double) string: list of string List of string columns (string) array: list of string List of array columns (array, map) """ def __init__(self, df, handy, strata=None): self._df = df self._handy = handy self._strata = strata self._colnames = None self.COLTYPES = {'continuous': self.continuous, 'categorical': self.categorical, 'numerical': self.numerical, 'string': self.string, 'array': self.array} def __getitem__(self, *args): if isinstance(args[0], tuple): args = args[0] item = args[0] if self._strata is None: if self._colnames is None: if item == slice(None, None, None): item = self._df.columns if isinstance(item, str): try: # try it as an alias item = self.COLTYPES[item] except KeyError: pass check_columns(self._df, item) self._colnames = item if isinstance(self._colnames, int): idx = self._colnames + (len(self._handy._group_cols) if self._handy._group_cols is not None else 0) assert idx < len(self._df.columns), "Invalid column index {}".format(idx) self._colnames = list(self._df.columns)[idx] return self else: try: n = item.stop if n is None: n = -1 except: n = 20 if isinstance(self._colnames, (tuple, list)): res = self._df.notHandy().select(self._colnames) if n == -1: if self._df._safety: print('\nINFO: Safety is ON - returning up to {} instances.'.format(self._df._safety_limit)) n = self._df._safety_limit if n != -1: res = res.limit(n) res = res.toPandas() self._handy._safety = True self._df._safety = True return res else: return self._handy.__getitem__(self._colnames, n) else: if self._colnames is None: if item == slice(None, None, None): item = self._df.columns if isinstance(item, str): try: # try it as an alias item = self.COLTYPES[item] except KeyError: pass self._strata._handycolumns = item return self._strata def __repr__(self): colnames = ensure_list(self._colnames) return "HandyColumns[%s]" % (", ".join("%s" % str(c) for c in colnames)) @property def numerical(self): """Returns list of numerical columns in the HandyFrame. """ return self._handy._numerical @property def categorical(self): """Returns list of categorical columns in the HandyFrame. """ return self._handy._categorical @property def continuous(self): """Returns list of continuous columns in the HandyFrame. """ return self._handy._continuous @property def string(self): """Returns list of string columns in the HandyFrame. """ return self._handy._string @property def array(self): """Returns list of array or map columns in the HandyFrame. """ return self._handy._array def mean(self): return self._handy.mean(self._colnames) def min(self): return self._handy.min(self._colnames) def max(self): return self._handy.max(self._colnames) def median(self, precision=.01): """Returns approximate median with given precision. Parameters ---------- precision: float, optional Default is 0.01 """ return self._handy.median(self._colnames, precision) def stddev(self): return self._handy.stddev(self._colnames) def var(self): return self._handy.var(self._colnames) def percentile(self, perc, precision=.01): """Returns approximate percentile with given precision. Parameters ---------- perc: integer Percentile to be computed precision: float, optional Default is 0.01 """ return self._handy.percentile(self._colnames, perc, precision) def q1(self, precision=.01): """Returns approximate first quartile with given precision. Parameters ---------- precision: float, optional Default is 0.01 """ return self._handy.q1(self._colnames, precision) def q3(self, precision=.01): """Returns approximate third quartile with given precision. Parameters ---------- precision: float, optional Default is 0.01 """ return self._handy.q3(self._colnames, precision) def _value_counts(self, dropna=True, raw=True): assert len(ensure_list(self._colnames)) == 1, "A single column must be selected!" return self._handy._value_counts(self._colnames, dropna, raw) def value_counts(self, dropna=True): """Returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- dropna : boolean, default True Don't include counts of missing values. Returns ------- counts: Series """ assert len(ensure_list(self._colnames)) == 1, "A single column must be selected!" return self._handy.value_counts(self._colnames, dropna) def entropy(self): """Returns object containing entropy (base 2) of each column. Returns ------- entropy: Series """ return self._handy.entropy(self._colnames) def mutual_info(self): """Returns object containing matrix of mutual information between every pair of columns. Returns ------- mutual_info: pd.DataFrame """ return self._handy.mutual_info(self._colnames) def mode(self): """Returns same-type modal (most common) value for each column. Returns ------- mode: Series """ colnames = ensure_list(self._colnames) modes = [self._handy.mode(colname) for colname in colnames] if len(colnames) == 1: return modes[0] else: return pd.concat(modes, axis=0) def corr(self, method='pearson'): """Compute pairwise correlation of columns, excluding NA/null values. Parameters ---------- method : {'pearson', 'spearman'} * pearson : standard correlation coefficient * spearman : Spearman rank correlation Returns ------- y : DataFrame """ colnames = [col for col in self._colnames if col in self.numerical] return self._handy.corr(colnames, method=method) def nunique(self): """Return Series with number of distinct observations for specified columns. Parameters ---------- exact: boolean, optional If True, computes exact number of unique values, otherwise uses an approximation. Returns ------- nunique: Series """ return self._handy.nunique(self._colnames) #, exact) def outliers(self, ratio=False, method='tukey', **kwargs): """Return Series with number of outlier observations according to the specified method for all columns. Parameters ---------- ratio: boolean, optional If True, returns proportion instead of counts. Default is True. method: string, optional Method used to detect outliers. Currently, only Tukey's method is supported. Default is tukey. Returns ------- outliers: Series """ return self._handy.outliers(self._colnames, ratio=ratio, method=method, **kwargs) def get_outliers(self, critical_value=.999): """Returns HandyFrame containing all rows deemed as outliers using Mahalanobis distance and informed critical value. Parameters ---------- critical_value: float, optional Critical value for chi-squared distribution to classify outliers according to Mahalanobis distance. Default is .999 (99.9%). """ return self._handy.get_outliers(self._colnames, critical_value) def remove_outliers(self, critical_value=.999): """Returns HandyFrame containing only rows NOT deemed as outliers using Mahalanobis distance and informed critical value. Parameters ---------- critical_value: float, optional Critical value for chi-squared distribution to classify outliers according to Mahalanobis distance. Default is .999 (99.9%). """ return self._handy.remove_outliers(self._colnames, critical_value) def hist(self, bins=10, ax=None): """Draws histogram of the HandyFrame's column using matplotlib / pylab. Parameters ---------- bins : integer, default 10 Number of histogram bins to be used ax : matplotlib axes object, default None """ return self._handy.hist(self._colnames, bins, ax) def boxplot(self, ax=None, showfliers=True, k=1.5, precision=.01): """Makes a box plot from HandyFrame column. Parameters ---------- ax : matplotlib axes object, default None showfliers : bool, optional (True) Show the outliers beyond the caps. k: float, optional Constant multiplier for the IQR. Default is 1.5 (corresponding to Tukey's outlier, use 3 for "far out" values) """ return self._handy.boxplot(self._colnames, ax, showfliers, k, precision) def scatterplot(self, ax=None): """Makes a scatter plot of two HandyFrame columns. Parameters ---------- ax : matplotlib axes object, default None """ return self._handy.scatterplot(self._colnames, ax) class HandyStrata(object): __handy_methods = (list(filter(lambda n: n[0] != '_', (map(itemgetter(0), inspect.getmembers(HandyFrame, predicate=inspect.isfunction) + inspect.getmembers(HandyColumns, predicate=inspect.isfunction)))))) + ['handy'] def __init__(self, handy, strata): self._handy = handy self._df = handy._df self._strata = strata self._col_clauses = [] self._colnames = [] self._temp_colnames = [] temp_df = self._df temp_df._handy = self._handy for col in self._strata: clauses = [] colname = str(col) self._colnames.append(colname) if isinstance(col, Bucket): self._temp_colnames.append(colname) buckets = col._get_buckets(self._df) clauses = col._get_clauses(buckets) bucketizer = Bucketizer(splits=buckets, inputCol=col.colname, outputCol=colname) temp_df = HandyFrame(bucketizer.transform(temp_df), self._handy) self._col_clauses.append(clauses) self._df = temp_df self._handy._df = temp_df self._df._handy = self._handy value_counts = self._df._handy._value_counts(self._colnames, raw=True).reset_index() self._raw_combinations = sorted(list(map(tuple, zip(*[value_counts[colname].values for colname in self._colnames])))) self._raw_clauses = [' and '.join('{} == {}'.format(str(col), value) if isinstance(col, Bucket) else '{} == "{}"'.format(str(col), value[0] if isinstance(value, tuple) else value) for col, value in zip(self._strata, comb)) for comb in self._raw_combinations] self._combinations = [tuple(value if not len(clauses) else clauses[int(float(value))] for value, clauses in zip(comb, self._col_clauses)) for comb in self._raw_combinations] self._clauses = [' and '.join(value if isinstance(col, Bucket) else '{} == "{}"'.format(str(col), value[0] if isinstance(value, tuple) else value) for col, value in zip(self._strata, comb)) for comb in self._combinations] self._strat_df = [self._df.filter(clause) for clause in self._clauses] self._df._strat_handy = self._handy # Shares the same HANDY object among all sub dataframes for i, df in enumerate(self._strat_df): df._strat_index = i df._strat_handy = self._handy self._imputed_values = {} self._handycolumns = None def __repr__(self): repr = "HandyStrata[%s]" % (", ".join("%s" % str(c) for c in self._strata)) if self._handycolumns is not None: colnames = ensure_list(self._handycolumns) repr = "HandyColumns[%s] by %s" % (", ".join("%s" % str(c) for c in colnames), repr) return repr def __getattribute__(self, name): try: if name == 'cols': return HandyColumns(self._df, self._handy, self) else: attr = object.__getattribute__(self, name) return attr except AttributeError as e: if name in self.__handy_methods: def wrapper(*args, **kwargs): raised = True try: # Makes stratification for df in self._strat_df: df._handy._strata = self._strata self._handy._set_stratification(self._strata, self._raw_combinations, self._raw_clauses, self._combinations, self._clauses) if self._handycolumns is not None: args = (self._handycolumns,) + args try: attr_strata = getattr(self._handy, '_strat_{}'.format(name)) self._handy._strata_object = attr_strata(*args, **kwargs) except AttributeError: pass try: if self._handycolumns is not None: f = object.__getattribute__(self._handy, name) else: f = object.__getattribute__(self._df, name) is_agg = getattr(f, '__is_agg', False) is_inccol = getattr(f, '__is_inccol', False) except AttributeError: is_agg = False is_inccol = False if is_agg or is_inccol: if self._handycolumns is not None: colnames = ensure_list(args[0]) else: colnames = self._df.columns res = getattr(self._handy, name)(*args, **kwargs) else: if self._handycolumns is not None: res = [getattr(df._handy, name)(*args, **kwargs) for df in self._strat_df] else: res = [getattr(df, name)(*args, **kwargs) for df in self._strat_df] if isinstance(res, pd.DataFrame): if len(self._handy.strata_colnames): res = res.set_index(self._handy.strata_colnames).sort_index() if is_agg: if len(colnames) == 1: res = res[colnames[0]] try: attr_post = getattr(self._handy, '_post_{}'.format(name)) res = attr_post(res) except AttributeError: pass strata = list(map(lambda v: v[1].to_dict(OrderedDict), self._handy.strata.iterrows())) strata_cols = [c if isinstance(c, str) else c.colname for c in self._strata] if isinstance(res, list): if isinstance(res[0], DataFrame): joined_df = res[0] self._imputed_values = joined_df.statistics_ self._fenced_values = joined_df.fences_ if len(res) > 1: if len(joined_df.statistics_): self._imputed_values = {self._clauses[0]: joined_df.statistics_} if len(joined_df.fences_): self._fenced_values = {self._clauses[0]: joined_df.fences_} for strat_df, clause in zip(res[1:], self._clauses[1:]): if len(joined_df.statistics_): self._imputed_values.update({clause: strat_df.statistics_}) if len(joined_df.fences_): self._fenced_values.update({clause: strat_df.fences_}) joined_df = joined_df.unionAll(strat_df) # Clears stratification self._handy._clear_stratification() self._df._strat_handy = None self._df._strat_index = None if len(self._temp_colnames): joined_df = joined_df.drop(*self._temp_colnames) res = HandyFrame(joined_df, self._handy) res._handy._imputed_values = self._imputed_values res._handy._fenced_values = self._fenced_values elif isinstance(res[0], pd.DataFrame): strat_res = [] indexes = res[0].index.names if indexes[0] is None: indexes = ['index'] for r, s in zip(res, strata): strata_dict = dict([(k if isinstance(k, str) else k.colname, v) for k, v in s.items()]) strat_res.append(r.assign(**strata_dict) .reset_index()) res = (pd.concat(strat_res) .sort_values(by=strata_cols) .set_index(strata_cols + indexes) .sort_index()) elif isinstance(res[0], pd.Series): # TODO: TEST strat_res = [] for r, s in zip(res, strata): strata_dict = dict([(k if isinstance(k, str) else k.colname, v) for k, v in s.items()]) series_name = none2default(r.name, 0) if series_name == name: series_name = 'index' strat_res.append(r.reset_index() .rename(columns={series_name: name, 'index': series_name}) .assign(**strata_dict) .set_index(strata_cols + [series_name])[name]) res = pd.concat(strat_res).sort_index() if len(ensure_list(self._handycolumns)) > 1: try: res = res.astype(np.float64) res = res.to_frame().reset_index().pivot_table(values=name, index=strata_cols, columns=series_name) res.columns.name = '' except ValueError: pass elif isinstance(res[0], np.ndarray): # TODO: TEST strat_res = [] for r, s in zip(res, strata): strata_dict = dict([(k if isinstance(k, str) else k.colname, v) for k, v in s.items()]) strat_res.append(pd.DataFrame(r, columns=[name]) .assign(**strata_dict) .set_index(strata_cols)[name]) res = pd.concat(strat_res).sort_index() elif isinstance(res[0], Axes): res, axs = self._handy._strata_plot res = consolidate_plots(res, axs, args[0], self._clauses) elif isinstance(res[0], list): joined_list = res[0] for l in res[1:]: joined_list += l return joined_list elif len(res) == len(self._combinations): # TODO: TEST strata_df = pd.DataFrame(strata) strata_df.columns = strata_cols res = (pd.concat([pd.DataFrame(res, columns=[name]), strata_df], axis=1) .set_index(strata_cols) .sort_index()) raised = False return res except HandyException as e: raise HandyException(str(e), summary=False) except Exception as e: raise HandyException(str(e), summary=True) finally: if not raised: if isinstance(res, HandyFrame): res._handy._clear_stratification() self._handy._clear_stratification() self._df._strat_handy = None self._df._strat_index = None if len(self._temp_colnames): self._df = self._df.drop(*self._temp_colnames) self._handy._df = self._df return wrapper else: raise e ================================================ FILE: handyspark/sql/datetime.py ================================================ from handyspark.sql.transform import HandyTransform import pandas as pd class HandyDatetime(object): __supported = {'boolean': ['is_leap_year', 'is_month_end', 'is_month_start', 'is_quarter_end', 'is_quarter_start', 'is_year_end', 'is_year_start'], 'string': ['strftime', 'tz', 'weekday_name'], 'integer': ['day', 'dayofweek', 'dayofyear', 'days_in_month', 'daysinmonth', 'hour', 'microsecond', 'minute', 'month', 'nanosecond', 'quarter', 'second', 'week', 'weekday', 'weekofyear', 'year'], 'date': ['date'], 'timestamp': ['ceil', 'floor', 'round', 'normalize', 'time', 'tz_convert', 'tz_localize']} __unsupported = ['freq', 'to_period', 'to_pydatetime'] __functions = ['strftime', 'ceil', 'floor', 'round', 'normalize', 'tz_convert', 'tz_localize'] __available = sorted(__supported['boolean'] + __supported['string'] + __supported['integer'] + __supported['date'] + __supported['timestamp']) __types = {n: t for t, v in __supported.items() for n in v} _colname = None def __init__(self, df, colname): self._df = df self._colname = colname if self._df.notHandy().select(colname).dtypes[0][1] != 'timestamp': raise AttributeError('Can only use .dt accessor with datetimelike values') def __getattribute__(self, name): try: attr = object.__getattribute__(self, name) return attr except AttributeError as e: if name in self.__available: if name in self.__functions: def wrapper(*args, **kwargs): return HandyTransform.gen_pandas_udf(f=lambda col: col.dt.__getattribute__(name)(**kwargs), args=(self._colname,), returnType=self.__types.get(name, 'string')) wrapper.__doc__ = getattr(pd.Series.dt, name).__doc__ return wrapper else: func = HandyTransform.gen_pandas_udf(f=lambda col: col.dt.__getattribute__(name), args=(self._colname,), returnType=self.__types.get(name, 'string')) func.__doc__ = getattr(pd.Series.dt, name).__doc__ return func else: raise e ================================================ FILE: handyspark/sql/pandas.py ================================================ from handyspark.sql.datetime import HandyDatetime from handyspark.sql.string import HandyString from handyspark.sql.transform import HandyTransform from handyspark.util import check_columns import pandas as pd class HandyPandas(object): __supported = {'boolean': ['between', 'between_time', 'isin', 'isna', 'isnull', 'notna', 'notnull'], 'same': ['abs', 'clip', 'clip_lower', 'clip_upper', 'replace', 'round', 'truncate', 'tz_convert', 'tz_localize']} __as_series = ['rank', 'interpolate', 'pct_change', 'bfill', 'cummax', 'cummin', 'cumprod', 'cumsum', 'diff', 'ffill', 'fillna', 'shift'] __available = sorted(__supported['boolean'] + __supported['same']) __types = {n: t for t, v in __supported.items() for n in v} def __init__(self, df): self._df = df self._colname = None def __getitem__(self, *args): if isinstance(args[0], tuple): args = args[0] item = args[0] check_columns(self._df, item) self._colname = item return self @property def str(self): """Returns a class to access pandas-like string column based methods through pandas UDFs Available methods: - contains - startswith / endswitch - match - isalpha / isnumeric / isalnum / isdigit / isdecimal / isspace - islower / isupper / istitle - replace - repeat - join - pad - slice / slice_replace - strip / lstrip / rstrip - wrap / center / ljust / rjust - translate - get - normalize - lower / upper / capitalize / swapcase / title - zfill - count - find / rfind - len """ return HandyString(self._df, self._colname) @property def dt(self): """Returns a class to access pandas-like datetime column based methods through pandas UDFs Available methods: - is_leap_year / is_month_end / is_month_start / is_quarter_end / is_quarter_start / is_year_end / is_year_start - strftime - tz / time / tz_convert / tz_localize - day / dayofweek / dayofyear / days_in_month / daysinmonth - hour / microsecond / minute / nanosecond / second - week / weekday / weekday_name - month / quarter / year / weekofyear - date - ceil / floor / round - normalize """ return HandyDatetime(self._df, self._colname) def __getattribute__(self, name): try: attr = object.__getattribute__(self, name) return attr except AttributeError as e: if name in self.__available: def wrapper(*args, **kwargs): returnType=self.__types.get(name, 'string') if returnType == 'same': returnType = self._df.notHandy().select(self._colname).dtypes[0][1] return HandyTransform.gen_pandas_udf(f=lambda col: col.__getattribute__(name)(**kwargs), args=(self._colname,), returnType=returnType) if name not in ['str', 'dt']: wrapper.__doc__ = getattr(pd.Series, name).__doc__ return wrapper else: raise e ================================================ FILE: handyspark/sql/schema.py ================================================ import numpy as np import datetime from operator import itemgetter from pyspark.sql.types import StructType _mapping = {str: 'string', bool: 'boolean', int: 'integer', float: 'float', datetime.date: 'date', datetime.datetime: 'timestamp', np.bool: 'boolean', np.int8: 'byte', np.int16: 'short', np.int32: 'integer', np.int64: 'long', np.float32: 'float', np.float64: 'double', np.ndarray: 'array', object: 'string', list: 'array', tuple: 'array', dict: 'map'} def generate_schema(columns, nullable_columns='all'): """ Parameters ---------- columns: dict of column names (keys) and types (values) nullables: list of nullable columns, optional, default is 'all' Returns ------- schema: StructType Spark DataFrame schema corresponding to Python/numpy types. """ columns = sorted(columns.items()) colnames = list(map(itemgetter(0), columns)) coltypes = list(map(itemgetter(1), columns)) invalid_types = [] new_types = [] keys = list(map(itemgetter(0), list(_mapping.items()))) for coltype in coltypes: if coltype not in keys: invalid_types.append(coltype) else: if coltype == np.dtype('O'): new_types.append(str) else: new_types.append(keys[keys.index(coltype)]) assert len(invalid_types) == 0, "Invalid type(s) specified: {}".format(str(invalid_types)) if nullable_columns == 'all': nullables = [True] * len(colnames) else: nullables = [col in nullable_columns for col in colnames] fields = [{"metadata": {}, "name": name, "nullable": nullable, "type": _mapping[typ]} for name, typ, nullable in zip(colnames, new_types, nullables)] return StructType.fromJson({"type": "struct", "fields": fields}) ================================================ FILE: handyspark/sql/string.py ================================================ from handyspark.sql.transform import HandyTransform import unicodedata import pandas as pd class HandyString(object): __supported = {'boolean': ['contains', 'startswith', 'endswith', 'match', 'isalpha', 'isnumeric', 'isalnum', 'isdigit', 'isdecimal', 'isspace', 'islower', 'isupper', 'istitle'], 'string': ['replace', 'repeat', 'join', 'pad', 'slice', 'slice_replace', 'strip', 'wrap', 'translate', 'get', 'center', 'ljust', 'rjust', 'zfill', 'lstrip', 'rstrip', 'normalize', 'lower', 'upper', 'title', 'capitalize', 'swapcase'], 'integer': ['count', 'find', 'len', 'rfind']} __unsupported = ['cat', 'extract', 'extractall', 'get_dummies', 'findall', 'index', 'split', 'rsplit', 'partition', 'rpartition', 'rindex', 'decode', 'encode'] __available = sorted(__supported['boolean'] + __supported['string'] + __supported['integer']) __types = {n: t for t, v in __supported.items() for n in v} _colname = None def __init__(self, df, colname): self._df = df self._colname = colname @staticmethod def _remove_accents(input): return unicodedata.normalize('NFKD', input).encode('ASCII', 'ignore').decode('unicode_escape') def remove_accents(self): return HandyTransform.gen_pandas_udf(f=lambda col: col.apply(HandyString._remove_accents), args=(self._colname,), returnType='string') def __getattribute__(self, name): try: attr = object.__getattribute__(self, name) return attr except AttributeError as e: if name in self.__available: def wrapper(*args, **kwargs): return HandyTransform.gen_pandas_udf(f=lambda col: col.str.__getattribute__(name)(**kwargs), args=(self._colname,), returnType=self.__types.get(name, 'string')) wrapper.__doc__ = getattr(pd.Series.str, name).__doc__ return wrapper else: raise e ================================================ FILE: handyspark/sql/transform.py ================================================ import datetime import inspect import numpy as np from pyspark.sql import functions as F _MAPPING = {'string': str, 'date': datetime.date, 'timestamp': datetime.datetime, 'boolean': np.bool, 'binary': np.byte, 'byte': np.int8, 'short': np.int16, 'integer': np.int32, 'long': np.int64, 'float': np.float32, 'double': np.float64, 'array': np.ndarray, 'map': dict} class HandyTransform(object): _mapping = dict([(v.__name__, k) for k, v in _MAPPING.items()]) _mapping.update({'float': 'double', 'int': 'integer', 'list': 'array', 'bool': 'boolean'}) @staticmethod def _get_return(sdf, f, args): returnType = None if args is None: args = f.__code__.co_varnames if len(args): returnType = sdf.select(args[0]).dtypes[0][1] return returnType @staticmethod def _signatureType(sig): returnType = None signatureType = str(sig.return_annotation)[7:] if '_empty' not in signatureType: returnType = signatureType types = returnType.replace(']', '').replace('[', ',').split(',')[:3] for returnType in types: assert returnType.lower().strip() in HandyTransform._mapping.keys(), "invalid returnType" types = list(map(lambda t: HandyTransform._mapping[t.lower().strip()], types)) returnType = types[0] if len(types) > 1: returnType = '<'.join([returnType, ','.join(types[1:])]) returnType += '>' return returnType @staticmethod def gen_pandas_udf(f, args=None, returnType=None): sig = inspect.signature(f) if args is None: args = tuple(sig.parameters.keys()) assert isinstance(args, (list, tuple)), "args must be list or tuple" name = '{}{}'.format(f.__name__, str(args).replace("'", "")) if returnType is None: returnType = HandyTransform._signatureType(sig) try: import pyarrow @F.pandas_udf(returnType=returnType) def udf(*args): return f(*args) except: @F.udf(returnType=returnType) def udf(*args): return f(*args) return udf(*args).alias(name) @staticmethod def gen_grouped_pandas_udf(sdf, f, args=None, returnType=None): # TODO: test it properly! sig = inspect.signature(f) if args is None: args = tuple(sig.parameters.keys()) assert isinstance(args, (list, tuple)), "args must be list or tuple" name = '{}{}'.format(f.__name__, str(f.__code__.co_varnames).replace("'", "")) if returnType is None: returnType = HandyTransform._signatureType(sig) schema = sdf.notHandy().select(*args).withColumn(name, F.lit(None).cast(returnType)).schema @F.pandas_udf(schema, F.PandasUDFType.GROUPED_MAP) def pudf(pdf): computed = pdf.apply(lambda row: f(*tuple(row[p] for p in f.__code__.co_varnames)), axis=1) return pdf.assign(__computed=computed).rename(columns={'__computed': name}) return pudf @staticmethod def transform(sdf, f, name=None, args=None, returnType=None): if name is None: name = '{}{}'.format(f.__name__, str(f.__code__.co_varnames).replace("'", "")) if isinstance(f, tuple): f, returnType = f if returnType is None: returnType = HandyTransform._get_return(sdf, f, args) return sdf.withColumn(name, HandyTransform.gen_pandas_udf(f, args, returnType)) @staticmethod def apply(sdf, f, name=None, args=None, returnType=None): if name is None: name = '{}{}'.format(f.__name__, str(f.__code__.co_varnames).replace("'", "")) if isinstance(f, tuple): f, returnType = f if returnType is None: returnType = HandyTransform._get_return(sdf, f, args) return sdf.select(HandyTransform.gen_pandas_udf(f, args, returnType).alias(name)) @staticmethod def assign(sdf, **kwargs): for c, f in kwargs.items(): typename = None if isinstance(f, tuple): f, typename = f if callable(f): if typename is None: typename = HandyTransform._get_return(sdf, f, None) if typename is not None: sdf = sdf.transform(f, name=c, returnType=typename) else: sdf = sdf.withColumn(c, F.lit(f())) else: sdf = sdf.withColumn(c, F.lit(f)) return sdf ================================================ FILE: handyspark/stats.py ================================================ import numpy as np from handyspark.util import check_columns, ensure_list from pyspark.mllib.common import _py2java from pyspark.mllib.stat.test import KolmogorovSmirnovTestResult def StatisticalSummaryValues(sdf, colnames): """Builds a Java StatisticalSummaryValues object for each column """ colnames = ensure_list(colnames) check_columns(sdf, colnames) jvm = sdf._sc._jvm summ = sdf.notHandy().select(colnames).describe().toPandas().set_index('summary') ssvs = {} for colname in colnames: values = list(map(float, summ[colname].values)) values = values[1], np.sqrt(values[2]), int(values[0]), values[4], values[3], values[0] * values[1] java_class = jvm.org.apache.commons.math3.stat.descriptive.StatisticalSummaryValues ssvs.update({colname: java_class(*values)}) return ssvs def tTest(jvm, *ssvs): """Performs a t-Test for difference of means using StatisticalSummaryValues objects """ n = len(ssvs) res = np.identity(n) java_class = jvm.org.apache.commons.math3.stat.inference.TTest java_obj = java_class() for i in range(n): for j in range(i + 1, n): pvalue = java_obj.tTest(ssvs[i], ssvs[j]) res[i, j] = pvalue res[j, i] = pvalue return res def KolmogorovSmirnovTest(sdf, colname, dist='normal', *params): """Performs a KolmogorovSmirnov test for comparing the distribution of values in a column to a named canonical distribution. """ check_columns(sdf, colname) # Supported distributions _distributions = ['Beta', 'Cauchy', 'ChiSquared', 'Exponential', ' F', 'Gamma', 'Gumbel', 'Laplace', 'Levy', 'Logistic', 'LogNormal', 'Nakagami', 'Normal', 'Pareto', 'T', 'Triangular', 'Uniform', 'Weibull'] _distlower = list(map(lambda v: v.lower(), _distributions)) try: dist = _distributions[_distlower.index(dist)] # the actual name for the Uniform distribution is UniformReal if dist == 'Uniform': dist += 'Real' except ValueError: # If we cannot find a distribution, fall back to Normal dist = 'Normal' params = (0., 1.) jvm = sdf._sc._jvm # Maps the DF column into a numeric RDD and turns it into Java RDD rdd = sdf.notHandy().select(colname).rdd.map(lambda t: t[0]) jrdd = _py2java(sdf._sc, rdd) # Gets the Java class of the corresponding distribution and creates an obj java_class = getattr(jvm, 'org.apache.commons.math3.distribution.{}Distribution'.format(dist)) java_obj = java_class(*params) # Loads the KS test class and performs the test ks = jvm.org.apache.spark.mllib.stat.test.KolmogorovSmirnovTest res = ks.testOneSample(jrdd.rdd(), java_obj) return KolmogorovSmirnovTestResult(res) ================================================ FILE: handyspark/util.py ================================================ from math import isnan, isinf import pandas as pd from pyspark.ml.linalg import DenseVector from pyspark.rdd import RDD from pyspark.sql import functions as F, DataFrame, Row from pyspark.sql.types import ArrayType, DoubleType, StructType, StructField from pyspark.mllib.common import _java2py, _py2java import traceback def none2default(value, default): return value if value is not None else default def none2zero(value): return none2default(value, 0) def ensure_list(value): if value is None: return [] if isinstance(value, (list, tuple)): return value else: return [value] def check_columns(df, colnames): if colnames is not None: available = df.columns colnames = ensure_list(colnames) colnames = [col if isinstance(col, str) else col.colname for col in colnames] diff = set(colnames).difference(set(available)) assert not len(diff), "DataFrame does not have {} column(s)".format(str(list(diff))[1:-1]) class bcolors: HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' class HandyException(Exception): def __init__(self, *args, **kwargs): try: # Summary is a boolean argument # If True, it prints the exception summary # This way, we can avoid printing the summary all # the way along the exception "bubbling up" summary = kwargs['summary'] if summary: print(HandyException.exception_summary()) except KeyError: pass @staticmethod def colortext(text, color_code): return color_code + text + (bcolors.ENDC if text[-4:] != bcolors.ENDC else '') @staticmethod def errortext(text): # Makes exception summary both BOLD and RED (FAIL) return HandyException.colortext(HandyException.colortext(text, bcolors.FAIL), bcolors.BOLD) @staticmethod def exception_summary(): # Gets the error stack msg = traceback.format_exc() try: # Builds the "frame" around the text top = HandyException.errortext('-' * 75 + '\nHANDY EXCEPTION SUMMARY\n') bottom = HandyException.errortext('-' * 75) # Gets the information about the error and makes it BOLD and RED info = list(filter(lambda t: len(t) and t[0] != '\t', msg.split('\n')[::-1])) error = HandyException.errortext('Error\t: {}'.format(info[0])) # Figure out where the error happened - location (file/notebook), line and function idx = [t.strip()[:4] for t in info].index('File') where = [v.strip() for v in info[idx].strip().split(',')] location, line, func = where[0][5:], where[1][5:], where[2][3:] # If it is a notebook, figures out the cell if 'ipython-input' in location: location = 'IPython - In [{}]'.format(location.split('-')[2]) # If it is a pyspark error, just go with it if 'pyspark' in error: new_msg = '\n{}\n{}\n{}'.format(top, error, bottom) # Otherwise, build the summary else: new_msg = '\n{}\nLocation: {}\nLine\t: {}\nFunction: {}\n{}\n{}'.format(top, location, line, func, error, bottom) return new_msg except Exception as e: # If we managed to raise an exception while trying to format the original exception... # Oh, well... return 'This is awkward... \n{}'.format(str(e)) def get_buckets(rdd, buckets): """Extracted from pyspark.rdd.RDD.histogram function """ if buckets < 1: raise ValueError("number of buckets must be >= 1") # filter out non-comparable elements def comparable(x): if x is None: return False if type(x) is float and isnan(x): return False return True filtered = rdd.filter(comparable) # faster than stats() def minmax(a, b): return min(a[0], b[0]), max(a[1], b[1]) try: minv, maxv = filtered.map(lambda x: (x, x)).reduce(minmax) except TypeError as e: if " empty " in str(e): raise ValueError("can not generate buckets from empty RDD") raise if minv == maxv or buckets == 1: return [minv, maxv], [filtered.count()] try: inc = (maxv - minv) / buckets except TypeError: raise TypeError("Can not generate buckets with non-number in RDD") if isinf(inc): raise ValueError("Can not generate buckets with infinite value") # keep them as integer if possible inc = int(inc) if inc * buckets != maxv - minv: inc = (maxv - minv) * 1.0 / buckets buckets = [i * inc + minv for i in range(buckets)] buckets.append(maxv) # fix accumulated error return buckets def dense_to_array(sdf, colname, new_colname): """Casts a Vector column into a new Array column. """ # Gets type of original column coltype = sdf.notHandy().select(colname).dtypes[0][1] # If it is indeed a vector... if coltype == 'vector': newrow = Row(*sdf.columns, new_colname) res = sdf.rdd.map(lambda row: newrow(*row, row[colname].values.tolist())).toDF(sdf.columns + [new_colname]) # Otherwise just copy the original column into a new one else: res = sdf.withColumn(new_colname, F.col(colname)) # Makes it a HandyFrame if isinstance(res, DataFrame): res = res.toHandy() return res def disassemble(sdf, colname, new_colnames=None): """Disassembles a Vector/Array column into multiple columns """ array_col = '_{}'.format(colname) # Gets type of original column coltype = sdf.notHandy().select(colname).schema.fields[0].dataType.typeName() # If it is a vector or array... if coltype in ['vectorudt', 'array']: # Makes the conversion from vector to array (or not :-)) tdf = dense_to_array(sdf, colname, array_col) # Checks the MIN size of the arrays in the dataset # If there are arrays with multiple sizes, it can still safely # convert up to that size size = tdf.notHandy().select(F.min(F.size(array_col))).take(1)[0][0] # If no new names were given, just uses the original name and # a sequence number as suffix if new_colnames is None: new_colnames = ['{}_{}'.format(colname, i) for i in range(size)] assert len(new_colnames) == size, \ "There must be {} column names, only {} found!".format(size, len(new_colnames)) # Uses `getItem` to disassemble the array into multiple columns res = tdf.select(*sdf.columns, *(F.col(array_col).getItem(i).alias(n) for i, n in zip(range(size), new_colnames))) # Otherwise just copy the original column into a new one else: if new_colnames is None: new_colnames = [colname] res = sdf.withColumn(new_colnames[0], F.col(colname)) # Makes it a HandyFrame if isinstance(res, DataFrame): res = res.toHandy() return res def get_jvm_class(cl): """Builds JVM class name from Python class """ return 'org.apache.{}.{}'.format(cl.__module__[2:], cl.__name__) def call_scala_method(py_class, scala_method, df, *args): """Given a Python class, calls a method from its Scala equivalent """ sc = df.sql_ctx._sc # Gets the Java class from the JVM, given the name built from the Python class java_class = getattr(sc._jvm , get_jvm_class(py_class)) # Converts all columns into doubles and access it as Java DF jdf = df.select(*(F.col(col).astype('double') for col in df.columns))._jdf # Creates a Java object from both Java class and DataFrame java_obj = java_class(jdf) # Converts remaining args from Python to Java as well args = [_py2java(sc, a) for a in args] # Gets method from Java Object and passes arguments to it to get results java_res = getattr(java_obj, scala_method)(*args) # Converts results from Java back to Python res = _java2py(sc, java_res) # If result is an RDD, it could be the case its elements are still # serialized tuples from Scala... if isinstance(res, RDD): try: # Takes the first element from the result, to check what it is first = res.take(1)[0] # If it is a dictionary, we need to check its value if isinstance(first, dict): first = list(first.values())[0] # If the value is a scala tuple, we need to deserialize it if first.startswith('scala.Tuple'): serde = sc._jvm.org.apache.spark.mllib.api.python.SerDe # We assume it is a Tuple2 and deserialize it java_res = serde.fromTuple2RDD(java_res) # Finally, we convert the deserialized result from Java to Python res = _java2py(sc, java_res) except IndexError: pass return res def counts_to_df(value_counts, colnames, n_points): """DO NOT USE IT! """ pdf = pd.DataFrame(value_counts .to_frame('count') .reset_index() .apply(lambda row: dict({'count': row['count']}, **dict(zip(colnames, row['index'].toArray()))), axis=1) .values .tolist()) pdf['count'] /= pdf['count'].sum() proportions = pdf['count'] / pdf['count'].min() factor = int(n_points / proportions.sum()) pdf = pd.concat([pdf[colnames], (proportions * factor).astype(int)], axis=1) combinations = pdf.apply(lambda row: row.to_dict(), axis=1).values.tolist() return pd.DataFrame([dict(v) for c in combinations for v in int(c.pop('count')) * [list(c.items())]]) ================================================ FILE: notebooks/Exploring_Titanic.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HandySpark\n", "\n", "### Bringing pandas-like capabilities to Spark dataframes!" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# UNCOMMENT THIS IF YOU'RE USING GOOGLE COLAB!\n", "\n", "#!apt-get install openjdk-8-jdk-headless -qq > /dev/null\n", "#!wget -q http://apache.osuosl.org/spark/spark-2.3.3/spark-2.3.3-bin-hadoop2.7.tgz\n", "#!tar xf spark-2.3.3-bin-hadoop2.7.tgz\n", "#!pip install numpy==1.15\n", "#!pip install -q pandas==0.24.1\n", "#!pip install -q seaborn==0.9\n", "#!pip install -q pyspark==2.3.3\n", "#!pip install -q findspark\n", "#!pip install -q handyspark\n", "\n", "# AFTER RUNNING THIS CELL, YOU MUST RESTART THE RUNTIME TO USE UPDATED VERSIONS OF PACKAGES!" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# UNCOMMENT THIS IF YOU'RE USING GOOGLE COLAB!\n", "\n", "#import os\n", "#os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n", "#os.environ[\"SPARK_HOME\"] = \"/content/spark-2.3.3-bin-hadoop2.7\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!wget https://raw.githubusercontent.com/dvgodoy/handyspark/master/tests/rawdata/train.csv" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import findspark\n", "import pandas as pd\n", "from pyspark.sql import SparkSession\n", "from pyspark.sql import functions as F\n", "from handyspark import *\n", "from matplotlib import pyplot as plt\n", "# fixes issue with seaborn hiding fliers on boxplot\n", "import matplotlib as mpl\n", "mpl.rc(\"lines\", markeredgewidth=0.5)\n", "\n", "findspark.init()\n", "os.environ['PYSPARK_SUBMIT_ARGS'] = '--master local[*] pyspark-shell'\n", "\n", "%matplotlib inline\n", "\n", "spark = SparkSession.builder.getOrCreate()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. Loading Data into a `HandyFrame`\n", "\n", "### After loading data as usual, just call method `toHandy()` (an extension to Spark's dataframe)!" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "HandyFrame[PassengerId: int, Survived: int, Pclass: int, Name: string, Sex: string, Age: double, SibSp: int, Parch: int, Ticket: string, Fare: double, Cabin: string, Embarked: string]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sdf = spark.read.csv('train.csv', header=True, inferSchema=True)\n", "hdf = sdf.toHandy()\n", "hdf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fetching some data\n", "\n", "- using an instance of `cols` from your `HandyFrame`, you can retrieve values for given columns in the top N rows" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Single column will be returned as a pandas Series" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 Braund, Mr. Owen Harris\n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n", "2 Heikkinen, Miss. Laina\n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n", "4 Allen, Mr. William Henry\n", "Name: Name, dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.cols['Name'][:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Multiple columns will be returned as a pandas DataFrame" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NamePclass
0Braund, Mr. Owen Harris3
1Cumings, Mrs. John Bradley (Florence Briggs Th...1
2Heikkinen, Miss. Laina3
3Futrelle, Mrs. Jacques Heath (Lily May Peel)1
4Allen, Mr. William Henry3
\n", "
" ], "text/plain": [ " Name Pclass\n", "0 Braund, Mr. Owen Harris 3\n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1\n", "2 Heikkinen, Miss. Laina 3\n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1\n", "4 Allen, Mr. William Henry 3" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.cols[['Name', 'Pclass']][:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### You can also use `:` to get all columns!" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NoneS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NoneS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NoneS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 None S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 None S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 None S " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.cols[:][:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2. Exploratory Data Analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Checking for missing values" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 0.000000\n", "Survived 0.000000\n", "Pclass 0.000000\n", "Name 0.000000\n", "Sex 0.000000\n", "Age 0.198653\n", "SibSp 0.000000\n", "Parch 0.000000\n", "Ticket 0.000000\n", "Fare 0.000000\n", "Cabin 0.771044\n", "Embarked 0.002245\n", "Name: missing(ratio), dtype: float64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.isnull(ratio=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Let's check the value counts for `Embarked`" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Embarked\n", "C 168\n", "Q 77\n", "S 644\n", "NaN 2\n", "Name: value_counts, dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.cols['Embarked'].value_counts(dropna=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### S (Southampton) is the port where most people Embarked... is it like that for every class?\n", "\n", "## 2.1 Enter `stratify`!\n", "### You can now get statistics and more at strata level, without using `groupby`!" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pclass Embarked\n", "1 C 85\n", " Q 2\n", " S 127\n", "2 C 17\n", " Q 3\n", " S 164\n", "3 C 66\n", " Q 72\n", " S 353\n", "Name: value_counts, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.stratify(['Pclass']).cols['Embarked'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### So, indeed, Southampton is the port where most of people from all classes embarked!\n", "\n", "### What about age? Is the average age different depending on `Pclass` and `Sex`? You bet!" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pclass Sex \n", "1 female 34.611765\n", " male 41.281386\n", "2 female 28.722973\n", " male 30.740707\n", "3 female 21.750000\n", " male 26.507589\n", "Name: Age, dtype: float64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.stratify(['Pclass', 'Sex']).cols['Age'].mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### \"`stratify` is cool, but can I use it with continuous values too?\" Sure thing!\n", "\n", "## 2.2 Enter `Bucket` and `Quantile`!\n", "\n", "### You can use any of them to split continuous values into bins for the stratification!" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sex Age Embarked\n", "female Age >= 0.4200 and Age < 40.2100 C 46\n", " Q 12\n", " S 154\n", " Age >= 40.2100 and Age <= 80.0000 C 15\n", " S 32\n", "male Age >= 0.4200 and Age < 40.2100 C 53\n", " Q 11\n", " S 287\n", " Age >= 40.2100 and Age <= 80.0000 C 16\n", " Q 5\n", " S 81\n", "Name: value_counts, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf.stratify(['Sex', Bucket('Age', 2)]).cols['Embarked'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Let's use `stratify` to perform a `fill` operation" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "hdf_filled = hdf.stratify(['Pclass', 'Sex']).fill(continuous=['Age'], strategy=['mean'])\n", "hdf_filled = hdf_filled.fill(categorical=['Embarked'])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 0\n", "Survived 0\n", "Pclass 0\n", "Name 0\n", "Sex 0\n", "Age 0\n", "SibSp 0\n", "Parch 0\n", "Ticket 0\n", "Fare 0\n", "Cabin 687\n", "Embarked 0\n", "Name: missing, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf_filled.isnull()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ok, no more missing values for `Age` and `Embarked`!\n", "### Which values were actually used?" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Age': {'Pclass == \"1\" and Sex == \"female\"': 34.61176470588235,\n", " 'Pclass == \"1\" and Sex == \"male\"': 41.28138613861386,\n", " 'Pclass == \"2\" and Sex == \"female\"': 28.722972972972972,\n", " 'Pclass == \"2\" and Sex == \"male\"': 30.74070707070707,\n", " 'Pclass == \"3\" and Sex == \"female\"': 21.75,\n", " 'Pclass == \"3\" and Sex == \"male\"': 26.507588932806325},\n", " 'Embarked': 'S'}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf_filled.statistics_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### \"But I want to use those values to fill the test set as well...\"\n", "\n", "## 2. 3 Enter the `transformers` generator!\n", "\n", "### You can generate a custom PySpark imputer transformer that will perform the stratified filling and it is serializable as well, so you can include it in your pipeline and save / load it at will!" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Age': {'Pclass == \"1\" and Sex == \"female\"': 34.61176470588235,\n", " 'Pclass == \"1\" and Sex == \"male\"': 41.28138613861386,\n", " 'Pclass == \"2\" and Sex == \"female\"': 28.722972972972972,\n", " 'Pclass == \"2\" and Sex == \"male\"': 30.74070707070707,\n", " 'Pclass == \"3\" and Sex == \"female\"': 21.75,\n", " 'Pclass == \"3\" and Sex == \"male\"': 26.507588932806325},\n", " 'Embarked': 'S'}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "imputer = hdf_filled.transformers.imputer()\n", "imputer.getDictValues()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2.4 Detecting and fencing outliers" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 0\n", "Survived 0\n", "Pclass 0\n", "Age 1\n", "SibSp 12\n", "Parch 213\n", "Fare 53\n", "Name: outliers, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf_filled.outliers(method='tukey', k=3.)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### There seem to be outliers for several columns... how can we be certain?\n", "\n", "## Maybe plotting them?\n", "\n", "- Unlike many tutorials on the internet, these plots are generated using Spark's distributed computing capabilities, and ***NOT converting the whole dataset to pandas to plot it*** (which defeats the whole purpose of Spark!)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6gAAAEWCAYAAABrHroEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3XucnVWd5/vPN3KPCgGTkoZAojDehcYyivH0KUEUwSOelgZsxwaGPmlejTaOlyb06DDM4IinsTF2O2JaxNBjizY0ygsil4MpHR1ALkbkok2M0CQNJMpFwtWQ3/ljP4k7MZeqSlXtXbs+79drv/bzrOd59vptdha1f3utZ61UFZIkSZIkddqUTgcgSZIkSRKYoEqSJEmSuoQJqiRJkiSpK5igSpIkSZK6ggmqJEmSJKkrmKBKkiRJkrqCCaqGLMlgkj/tdBxSr0nyviTXtu1XkgM6GZMkSVInmKD2iCT3JnkqyZokDyX5SpLndzouSb+V5M1J/neSx5I8nOQHSV5fVV+tqrcN8TV2SvKZJCua9n5vks+OdeySJEnjwQS1t/xfVfV84BCgH/j4cC5OssOYRCWJJC8ErgT+FtgT2Ac4G3hmmC91Jq32PQd4ATAA3DZqgUqSJHWQCWoPqqqVwLeBVyc5OcndSR5PsjzJn60/L8lA0wtzRpIHgYua8mOSLE3y6yQ/T3Jk28vv3/T6PJ7k2iQvGt93J01Y/w6gqr5WVc9V1VNVdW1V3Z7kpCTf3+T8o5o2+8skf51k/f+vXw9cXlX/Vi33VtXF6y9qelTPTHJXkkeSXJRkl3F6j5IkSdvFBLUHJZkJHAX8CFgFvBN4IXAycH6SQ9pOfzGt3pz9gXlJ5gAXAx8D9gD+ALi37fw/bl5nBrAT8NGxfC9SD/kX4Lkki5K8I8m0bZz/f9PqKT0EOAb4D035jcCHk/x5ktckyWaufR/wduCltBLjYY2mkCRJ6hQT1N7yzSSPAt8Hvgv896q6qqp+3vS0fBe4Fvg/2q5ZB5xVVc9U1VPAKcCXq+q6qlpXVSur6qdt519UVf/SnPsN4ODxeWvSxFZVvwbeDBTw98DqJFck6dvCJZ+uqoer6l+BzwLvbco/BXyaVhJ6C7AyyYmbXPt3VXV/VT0MfLLtWkmSpK5mgtpb3l1Ve1TV/lX151X1VNNTc2MzIcujtHpW24flrq6qp9v2ZwI/30odD7ZtPwk4EZM0RFV1d1WdVFX7Aq8Gfo9W8rk597dt39ecSzM8+PNVNZfWKIdPAl9O8optXStJktTtTFB7WJKdgcuA84C+qtoDWAy0DwmsTS67n9awQEljqBmZ8BVaiermzGzb3g/4t828xlNV9XngEeCVw7lWkiSpG5mg9radgJ2B1cDaJO8AtrWUxYXAyUkOTzIlyT5JXj7WgUq9LsnLk3wkyb7N/kxaQ29v3MIlH0syrTnvdODrzXUfaiY42zXJDs3w3hfQuud8vdOS7JtkT+A/rb9WkiSp25mg9rCqehz4C1r3ij5Ca4KjK7ZxzQ9pJlMCHqN1L+v+YxupNCk8DrwBuCnJE7QS0zuAj2zh/G8BtwJLgato/XgEraH1n6E13P6XwGnAe6pqedu1/0jrfvPltIbsnzOq70SSJGmMpGrTEZ6SpIkqyb3An1bV/9fpWCRJkobLHlRJkiRJUlcwQZUkSZIkdQWH+EqSJEmSuoI9qJIkSZKkrrBDpwMAeNGLXlSzZs3qdBhSR916662/rKrpnY5jc2yjkm1UW/fEE08wderUTocxqdlGtTW20c4bahvdZoKa5GVsvIbeS4D/DFzclM8C7gWOq6pHkgRYABxFazmEk6rqtq3VMWvWLG655ZZthSL1tCT3dTqGLbGNSrZRbd3g4CADAwOdDmNSs41qa2yjnTfUNrrNIb5V9bOqOriqDgZeRyvpvByYD1xfVQcC1zf7AO8ADmwe84AvDD98SZIkSdJkM9x7UA8Hfl5V9wHHAIua8kXAu5vtY4CLq+VGYI8ke49KtJIkSZKknjXcBPUE4GvNdl9VPdBsPwj0Ndv7APe3XbOiKdtIknlJbklyy+rVq4cZhiRJkiSp1ww5QU2yE/Au4J82PVattWqGtV5NVS2sqv6q6p8+vSvvZ5ckSZIkjaPh9KC+A7itqh5q9h9aP3S3eV7VlK8EZrZdt29TJkmSJEnSFg0nQX0vvx3eC3AFcGKzfSLwrbbyP0nLG4HH2oYCS5KkRpL/mOTOJHck+VqSXZLMTnJTkmVJvt6MYFKXuu+++zj77LO56KKLOPvss7nvvq6dSFaalC677DJmz57N8ccfz+zZs7nssss6HZK2YUjroCaZChwB/Flb8bnAN5KcAtwHHNeUL6a1xMwyWjP+njxq0UqS1COS7AP8BfDKqnoqyTdozfVwFHB+VV2S5ALgFJwRvyvdd999/O3f/i1nn302N998M69//es566yz+OAHP8j+++/f6fCkSe+yyy7jtNNO44YbbuC+++5j//3359BDDwXgPe95T4ej05YMqQe1qp6oqr2q6rG2sl9V1eFVdWBVvbWqHm7Kq6pOq6qXVtVrqspFnyRJ2rwdgF2T7ADsBjwAHAZc2hxvnyVfXeYrX/kKZ599NlOnTgVg6tSpnH322XzlK1/pbGCSAPjoRz/KDTfcwOzZswGYPXs2N9xwAx/96Ec7HJm2Zkg9qJK6W5I9gC8Br6Y1Ydl/AH4GfB2YBdwLHFdVjyQJsIBWL82TwElVdVsHwpYmtapameQ84F+Bp4BrgVuBR6tqbXPaZmfCh9Zs+LTWG6evr4/BwcExj1kbW758OTfffDMAa9as2fAZLF++3M9D6gLr1q3bkJyuN3v2bNatW9ehiDQUXZ+gzpp/1Yiuu/fco0c5EqmrLQCurqpjm/vVdgP+Cri+qs5NMh+YD5xBa8KzA5vHG2gNHXzDSCu2jUojk2QarbXDZwOP0pol/8ihXl9VC4GFAP39/TUwMDAGUWprvvvd7/L617+eqVOnMjg4yMDAAE888QTf/e538fOQOm/KlCn84he/2ChJ/cUvfsGUKcNdaVPjyU9HmuCS7A78AXAhQFU9W1WP0vriu6g5rX2Y4DHAxc1w/BuBPdbPyC1pXL0V+EVVra6q3wD/DMyl1SbX/4DsTPhd7KSTTuKss87iiSeeAOCJJ57grLPO4qSTTupsYBq2JPcm+UmSpUluacr2THJdknua52lNeZJ8rpnI7PYkh3Q2em3Jeeedx6GHHsovfvELoJWcHnrooZx33nkdjkxb0/U9qJK2aTawGrgoyUG0hgieDvS1zaD9INDXbO8D3N92/fohhBvNtt0+fHC//fYbs+ClSexfgTcm2Y3WEN/DgVuAJcCxwCVsPEu+usz+++/PBz/4Qc477zyWL1/Od7/7XSdImtjeUlW/bNufzziMRNLYWT8R0mGHHcaTTz7Jbrvtxuc//3knSOpyJqjSxLcDcAjwwaq6KckCWn9EN6iqSlLDedFNhw+OVrCSWpr2eilwG7AW+BGtNncVcEmSc5qyCzsXpbZl//3356yzztowxFc95RhgoNleBAzSSlA3jEQCbkyyR5K9XVaxO73nPe/hPe95j210AjFBlSa+FcCKqrqp2b+UVoL60Po/mM0Q3lXN8ZXAzLbrHUIodUhVnQWctUnxcmBOB8KRJrMCrm1+zP1i8yPtqI1EciKzzmufyEzdzQRVmuCq6sEk9yd5WVX9jNYwwbuax4m01ixuHyZ4BfCBJJfQGpL0mL/6SpImuTc3M2vPAK5L8tP2g6MxEsneu86yB3XiMEGVesMHga82M/guB06mNQnaN5KcAtwHHNecu5jWEjPLaC0zc/L4hytJUveoqpXN86okl9MaxeBIJKkDTFClHlBVS4H+zRw6fDPnFnDamAclSdIEkGQqMKWqHm+23wb8V1ojjhyJJI0zE1RJkiRNZn3A5Umg9d34H6vq6iQ340gkadyZoEqSJGnSqqrlwEGbKf8VjkSSxt2UTgcgSZIkSRKYoEqSJEmSuoQJqiRJkiSpK5igSpIkSZK6gpMkSZIkDUEzy+uItObVkSRtiz2okiRJQ1BVW3zsf8aVWz0uSRoaE1RJkiRJUlcwQZUkSZIkdQUTVEmSJElSVzBBlSRJkiR1hSElqEn2SHJpkp8muTvJoUn2THJdknua52nNuUnyuSTLktye5JCxfQuSJEmSpF4w1B7UBcDVVfVy4CDgbmA+cH1VHQhc3+wDvAM4sHnMA74wqhFLktQDkrwsydK2x6+TfGhLPwBLkjQZbDNBTbI78AfAhQBV9WxVPQocAyxqTlsEvLvZPga4uFpuBPZIsveoRy5J0gRWVT+rqoOr6mDgdcCTwOVs+QdgSZJ63lB6UGcDq4GLkvwoyZeSTAX6quqB5pwHgb5mex/g/rbrVzRlkiRp8w4Hfl5V97HlH4AlSep5OwzxnEOAD1bVTUkWsMmvuVVVSYa1CnWSebSGALPffvsN51JJknrNCcDXmu0t/QC8kfa/o319fQwODo51jNoGPwNJ2n5DSVBXACuq6qZm/1JaCepDSfauqgeaIbyrmuMrgZlt1+/blG2kqhYCCwH6+/uHldxKktQrkuwEvAs4c9NjW/sBeNO/owMDA2MZprbl6qvwM5Ck7bfNIb5V9SBwf5KXNUWHA3cBVwAnNmUnAt9qtq8A/qSZzfeNwGNtvwRLkqSNvQO4raoeavYfWj93wyY/AEuS1POG0oMK8EHgq82vvMuBk2klt99IcgpwH3Bcc+5i4ChgGa0JH04e1YglSeot7+W3w3vhtz8An8vGPwBLktTzhpSgVtVSoH8zhw7fzLkFnLadcUmS1POaSQePAP6srfhcNv8DsCRJPW+oPaiSJGmUVdUTwF6blP2KzfwALEnSZDCUZWYkSZIkSRpzJqiSJEmSpK5ggipJkiRJ6gomqFIPSHJvkp8kWZrklqZszyTXJbmneZ7WlCfJ55IsS3J7kkM6G70kSZLUYoIq9Y63VNXBVbV+xu35wPVVdSBwfbMPrTUXD2we84AvjHukkiRJ0maYoEq96xhgUbO9CHh3W/nF1XIjsEeSvTsRoCRJktTOBFXqDQVcm+TWJPOasr6qeqDZfhDoa7b3Ae5vu3ZFU7aRJPOS3JLkltWrV49V3JIkSdIGroMq9YY3V9XKJDOA65L8tP1gVVWSGs4LVtVCYCFAf3//sK6VJEmSRsIeVKkHVNXK5nkVcDkwB3ho/dDd5nlVc/pKYGbb5fs2ZZIkSVJHmaBKE1ySqUlesH4beBtwB3AFcGJz2onAt5rtK4A/aWbzfSPwWNtQYEmSJp0kz0vyoyRXNvuzk9zUzHj/9SQ7NeU7N/vLmuOzOhm31ItMUKWJrw/4fpIfAz8Erqqqq4FzgSOS3AO8tdkHWAwsB5YBfw/8+fiHLElSVzkduLtt/9PA+VV1APAIcEpTfgrwSFN+fnOepFHkPajSBFdVy4GDNlP+K+DwzZQXcNo4hCZJUtdLsi9wNPBJ4MNJAhwG/HFzyiLgv9Balu2YZhvgUuDvkqT52yppFNiDKkmSpMnss8BfAuua/b2AR6tqbbPfPtv9hpnwm+OPNedLGiX2oEqSJGlSSvJOYFVV3ZpkYJRfex4wD6Cvr4/BwcHRfHkN05o1a/wMJggTVEmSJE1Wc4F3JTkK2AV4IbAA2CPJDk0vafts9+tnwl+RZAdgd+BXm3vhTZdrGxgYGMv3oW0YHBzEz2BicIivJEmSJqWqOrOq9q2qWcAJwHeq6n3AEuDY5rRNZ8JfP0P+sc353n8qjSITVEmSOiTJHkkuTfLTJHcnOTTJnkmuS3JP8zyt03FKk9AZtCZMWkbrHtMLm/ILgb2a8g8D8zsUn9SzHOIrSVLnLACurqpjm3UWdwP+Cri+qs5NMp/WF+AzOhmkNBlU1SAw2GwvB+Zs5pyngT8a18CkScYeVEmSOiDJ7sAf0PTMVNWzVfUorWUsFjWnLQLe3ZkIJUkaf/agSpLUGbOB1cBFSQ4CbgVOB/qq6oHmnAeBvs1d7Ayh3cfPQJK2nwmqJEmdsQNwCPDBqropyQI2uZ+tqirJZidgcYbQLnP1Vc4QKkmjYEhDfJPcm+QnSZYmuaUp2+wkDmn5XJJlSW5PcshYvgFJkiaoFcCKqrqp2b+UVsL6UJK9AZrnVR2KT5KkcTece1DfUlUHV1V/sz+f1iQOBwLX89tffd8BHNg85gFfGK1gJUnqFVX1IHB/kpc1RYcDd7HxMhbty1tIktTztmeI7zHAQLO9iNasZ2c05Rc3a0Ld2Eyhv3fb/TSSJKnlg8BXmxl8lwMn0/rx+BtJTgHuA47rYHySJI2roSaoBVzb3Afzxea+ly1N4rAPcH/btSuaso0S1PbJHfbbb7+RRS9J0gRWVUuB/s0cOny8Y5EkqRsMNUF9c1WtTDIDuC7JT9sPbm0Shy3ZdHKH4VwrSZIkSeo9Q7oHtapWNs+rgMtpLVy8pUkcVgIz2y7ftymTJEmSJGmLtpmgJpma5AXrt4G3AXew5UkcrgD+pJnN943AY95/KkmSJEnalqEM8e0DLk+y/vx/rKqrk9zM5idxWAwcBSwDnqQ14YMkSZIkSVu1zQS1qpYDB22m/FdsZhKHZvbe00YlOkmSJEnSpDGcdVAlSZIkSRozJqiSJEmSpK5ggipJkiRJ6gomqJIkSZKkrmCCKkmSJEnqCiaokiRJkqSuYIIqSZIkSeoKJqhSj0jyvCQ/SnJlsz87yU1JliX5epKdmvKdm/1lzfFZnYxbkiRJWs8EVeodpwN3t+1/Gji/qg4AHgFOacpPAR5pys9vzpMkSZI6zgRV6gFJ9gWOBr7U7Ac4DLi0OWUR8O5m+5hmn+b44c35kiRJUkeZoEq94bPAXwLrmv29gEeram2zvwLYp9neB7gfoDn+WHO+JEmS1FEmqNIEl+SdwKqqunWUX3dekluS3LJ69erRfGlJjST3JvlJkqVJbmnK9kxyXZJ7mudpnY5TkqTxYoIqTXxzgXcluRe4hNbQ3gXAHkl2aM7ZF1jZbK8EZgI0x3cHfrXpi1bVwqrqr6r+6dOnj+07kCa3t1TVwVXV3+zPB66vqgOB65t9SZImBRNUaYKrqjOrat+qmgWcAHynqt4HLAGObU47EfhWs31Fs09z/DtVVeMYsqSta79PvP3+cUmSet4O2z5F0gR1BnBJknOAHwEXNuUXAv+QZBnwMK2kVlJnFHBtkgK+WFULgb6qeqA5/iDQt7kLk8wD5gH09fUxODg4DuFqa/wMJGn7maBKPaSqBoHBZns5MGcz5zwN/NG4BiZpS95cVSuTzACuS/LT9oNVVU3y+juaZHYhQH9/fw0MDIx5sNqKq6/Cz0CStp9DfCVJ6pCqWtk8rwIup/Wj0kNJ9gZonld1LkJJksaXCaokSR2QZGqSF6zfBt4G3MHG94m33z8uSVLPM0GVJKkz+oDvJ/kx8EPgqqq6GjgXOCLJPcBbm31JYyTJLkl+mOTHSe5McnZTPjvJTUmWJfl6kp2a8p2b/WXN8VmdjF/qNd6DKklSBzT3iR+0mfJfAYePf0TSpPUMcFhVrUmyI60fjr4NfBg4v6ouSXIBcArwheb5kao6IMkJwKeB4zsVvNRr7EGVJEnSpFUta5rdHZtH0VpX/NKmvH3Jp/aloC4FDk+ScQpX6nlD7kFN8jzgFmBlVb0zyWzgEmAv4Fbg/VX1bJKdgYuB1wG/Ao6vqntHPXJJkiRpFDTfc28FDgA+D/wceLSq1janrAD2abb3Ae4HqKq1SR6j9X34l5u8pktBdZE1a9b4GUwQwxniezpwN/DCZv/TOOxBkiRJE1xVPQccnGQPWjNqv3wUXtOloLrI4OCgS0FNEEMa4ptkX+Bo4EvNfnDYgyRJknpIVT0KLAEOBfZIsr4zZ19gZbO9EpgJ0BzfndaoQUmjYKj3oH4W+EtgXbO/F0Mc9gCsH/YgSZIkdZUk05ueU5LsChxBa9TgEuDY5rT2JZ/al4I6FvhOVdX4RSz1tm0mqEneCayqqltHs+Ik85LckuSW1atXj+ZLS5IkSUO1N7Akye3AzcB1VXUlcAbw4STLaHW2XNicfyGwV1P+YWB+B2KWetZQ7kGdC7wryVHALrTuQV1AM+yh6SXd3LCHFVsb9rDpuPztfSOSJEnScFXV7cDvb6Z8OTBnM+VPA380DqFJk9I2e1Cr6syq2reqZgEn0BrG8D4c9iBJkiRJGkXbsw6qwx4kSZIkSaNmOMvMUFWDwGCz7bAHSZIkSdKo2Z4eVEmSJEmSRo0JqiRJkiSpK5igSpIkSepJF1xwAdOmTePoo49m2rRpXHDBBZ0OSdswrHtQJUmSJGkiuOCCC/jQhz7Ea1/7Wp555hl23nlnPvShDwFw6qmndjg6bYk9qJIkSZJ6zsc+9jFe+tKXcuWVV7JgwQKuvPJKXvrSl/Kxj32s06FpK0xQJUnqoCTPS/KjJFc2+7OT3JRkWZKvJ9mp0zFK0kT01FNPsWTJEmbMmAHAjBkzWLJkCU899VSHI9PWmKBKktRZpwN3t+1/Gji/qg4AHgFO6UhUktQDVq9evdV9dR8TVEmSOiTJvsDRwJea/QCHAZc2pywC3t2Z6CRpYnvJS15Cf38/d955JwB33nkn/f39vOQlL+lwZNoaJ0mSJKlzPgv8JfCCZn8v4NGqWtvsrwD26URgkjTRXXTRRfzhH/4hBx10EOvWrWPKlCnstddeXHTRRZ0OTVthgipJUgckeSewqqpuTTIwguvnAfMA+vr6GBwcHN0ANWx+BlL3mTZtGgcccABr1qzh+c9/Pr/61a86HZK2wQRVkqTOmAu8K8lRwC7AC4EFwB5Jdmh6UfcFVm7u4qpaCCwE6O/vr4GBgXEJWltw9VX4GUjd5eMf/zjf+973mDFjBoODgwwMDLBq1SqOP/54lixZ0unwtAXegypNcEl2SfLDJD9OcmeSs5vyzc4EmmTnZn9Zc3xWJ+OXJquqOrOq9q2qWcAJwHeq6n3AEuDY5rQTgW91KERJmtDWrVu3YQbf9WbMmMG6des6FJGGwgRVmvieAQ6rqoOAg4Ejk7yRLc8EegrwSFN+fnOepO5xBvDhJMto3ZN6YYfjkaQJacqUKaxatWqjslWrVjFliilQN/PTkSa4alnT7O7YPIotzwR6TLNPc/zwZuZQSR1SVYNV9c5me3lVzamqA6rqj6rqmU7HJ0kT0TnnnMNRRx21IUldtWoVRx11FOecc06HI9PWeA+q1AOSPA+4FTgA+Dzwc7Y8E+g+wP0AVbU2yWO0eml+Oa5BS5IkjaG5c+eyYMECjj/+eB5++GH23HNPFixYwNy5czsdmrbCBFXqAVX1HHBwkj2Ay4GXb+9rts8Qut9++23vy0mSJI27uXPnsmTJkg2TJKn7OcRX6iFV9SitCVYOpZkJtDnUPhPoSmAmQHN8d+B35lyvqoVV1V9V/dOnTx/z2CVJkkbbBRdcwLRp0zj66KOZNm0aF1xwQadD0jbYgypNcEmmA7+pqkeT7AocQWvio/UzgV7CxjOBXtHs39Ac/05V1bgHLkmSNIYuuOACPvKRj/DDH/6Q1atXM336dObMmQPAqaee2uHotCUmqNLEtzewqLkPdQrwjaq6MsldwCVJzgF+xG9nAr0Q+IdmhtCHaS1vIUmS1FPOPPNMvvnNb3LppZeyfPlyXvKSl/DNb36T4447zgS1i5mgShNcVd0O/P5mypcDczZT/jTwR+MQmiRNKAedfS2PPfWbEV8/a/5Vw75m91135MdnvW3EdUrasrVr13LNNddw9tlnc/PNN/P617+es846i7Vr1277YnWMCaokSRLw2FO/4d5zjx7RtSOdgGUkSa2koVm7di3HH388U6dOBWDq1Kkcf/zxfP7zn+9wZNoaJ0mSJEmS1HMOO+wwBgYGuPPOOwG48847GRgY4LDDDutwZNqabfagJtkF+B6wc3P+pVV1VpLZtCZf2YvW+ovvr6pnk+wMXAy8jtbMoMdX1b1jFL8kSZIk/Y45c+aw22678ZrXvIaqIgnvec97ePWrX93p0LQVQ+lBfQY4rKoOAg4GjkzyRlqzhJ5fVQcAjwCnNOefAjzSlJ/fnCdJkiRJ42aHHXbgm9/8JjfddBNLlizhpptu4pvf/CY77OBdjt1smwlqtaxpdndsHgUcBlzalC8C3t1sH9Ps0xw/PElGLWJJkiRJ2obzzjuPxYsXs3jxYi666KIN2+edd16nQ9NWDOnng2b5iluBA4DPAz8HHq2q9VNgrQD2abb3Ae4HqKq1SR6jNQz4l5u85jxgHsB+++23fe9CkiRJktqsW7eOI444giOOOGKjiczWrVvX2cC0VUOaJKmqnquqg4F9aS1b8fLtrbiqFlZVf1X1T58+fXtfTpIkSZI2mDJlyoYJkta78847mTLFeWK72bA+nap6FFgCHArskWR9D+y+wMpmeyUwE6A5vjutyZIkSZKkrpJkZpIlSe5KcmeS05vyPZNcl+Se5nlaU54kn0uyLMntSQ7p7DvQlnzqU59izpw5G83iO2fOHD71qU91ODJtzVBm8Z0O/KaqHk2yK3AErYmPlgDH0prJ90TgW80lVzT7NzTHv1NVNQaxS5IkSdtrLfCRqrotyQuAW5NcB5wEXF9V5yaZD8wHzgDeARzYPN4AfKF5Vpc59dRTAXjzm9/Ms88+y0477cRnPvOZDeXqTkPpQd0bWJLkduBm4LqqupJWA/1wkmW07jG9sDn/QmCvpvzDtBqzJEmS1HWq6oGquq3Zfhy4m9acKu0Tf246IejFzUSiN9IaVbj3OIetITr11FN55JFHuOqqq3jkkUdMTieAbfagVtXtwO9vpnw5rftRNy1/GvijUYlOkqQeNdx1xjsXqTR5JJlF63vvTUBfVT3QHHoQ6Gu2N0wI2lg/WegDbWUbTQja19fH4ODgWIWtIVizZo2fwQThIkCSJHXG+nXG1yTZEfh+km/TGn10flVdkuQCWuuLf6GTgUqTQZLnA5cBH6qqX7evklhVlWRYt6xV1UJgIUB/f3+tn0FWndE+i6+6m1NYSZLUASNYZ1zSGGl+JLoM+GpV/XNT/ND6obvN86qmfMOEoI32yUIlbSd7UCVJ6pBhrjNZxD0rAAAYI0lEQVS+6bUOHxwDI/3vuD3DB/3sOiutrtILgbur6m/aDq2f+PNcfndC0A8kuYTW5EiPtQ0FlrSdTFAlSeqQqnoOODjJHsDlDGOdcYcPjoGrrxrxEMARDx/cjjo1auYC7wd+kmRpU/ZXtBLTbyQ5BbgPOK45thg4ClgGPAmcPL7hSr3NBFWSpA5rlnLbaJ3xphfVoYPSGKuq7wPZwuHDN3N+AaeNaVDSJOY9qJIkdUCS6U3PKW3rjN/Nb9cZh42HFUqS1PPsQZUkqTP2BhY196FOAb5RVVcmuQu4JMk5wI/47TrjkiT1PBNUSZI6YLjrjEuSNBk4xFeSJEmS1BVMUCVJkiT1pB/84Ae85S1v4fTTT+ctb3kLP/jBDzodkrbBIb6SJEmSes4PfvADTj/9dBYvXsxdd93FK1/5So466igWLFjA3LlzOx2etsAEVZIkCXjBK+bzmkXzR/4Ci0ZSJ8DRI69T0hZ9/OMfZ/HixcyYMYO77rqLGTNmsHjxYo4//niWLFnS6fC0BSaokiRJwON3n8u9544sWRwcHGRgYGDY182af9WI6pO0bevWrWPGjBkblc2YMYN169Z1KCINhfegSpIkSeo5U6ZMYdWqVRuVrVq1iilTTIG6mT2okiRJknrOOeecwyte8QoeeeQRqookTJs2jSuuuKLToWkrTFClCS7JTOBioA8oYGFVLUiyJ/B1YBZwL3BcVT2SJMAC4CjgSeCkqrqtE7GPxEiHw4102J4kSZqYFi1axMMPP8yrX/1qpkyZwrp167jjjjtYtGiRkyR1Mfu3pYlvLfCRqnol8EbgtCSvBOYD11fVgcD1zT7AO4ADm8c84AvjH7IkSdLYuvDCC/nMZz7Di170IgBe9KIX8ZnPfIYLL7yww5Fpa+xBlSa4qnoAeKDZfjzJ3cA+wDHAQHPaImAQOKMpv7iqCrgxyR5J9m5eR5IkqSesW7eOf/zHf/ydZWacJKm72YMq9ZAks4DfB24C+tqSzgdpDQGGVvJ6f9tlK5qyTV9rXpJbktyyevXqMYtZkiRprHzsYx/bMJPvjBkz+NjHPtbhiLQt9qBKPSLJ84HLgA9V1a9bt5q2VFUlqeG8XlUtBBYC9Pf3D+taSZKkTnvxi1/MCSecwB577MHOO+/MNddcwwknnMCLX/ziToemrTBBlXpAkh1pJadfrap/boofWj90N8newPp51lcCM9su37cpkyRJ6hkvf/nLOfzwwzcM650yZQrve9/7WLnSrz3dzCG+0gTXzMp7IXB3Vf1N26ErgBOb7ROBb7WV/0la3gg85v2nkiSp15xzzjl8+9vfpjXtBlQV3/72tznnnHM6HJm2ZpsJapKZSZYkuSvJnUlOb8r3THJdknua52lNeZJ8LsmyJLcnOWSs34Q0yc0F3g8clmRp8zgKOBc4Isk9wFubfYDFwHJgGfD3wJ93IGZJkqQxtX6ZmVe96lW89rWv5VWvehUPP/wwixYt6nRo2oqhDPFdv4TFbUleANya5DrgJFpLWJybZD6tJSzOYOMlLN5AawmLN4xF8JKgqr4PZAuHD9/M+QWcNqZBSdIENdK1lgG4evjX7r7rjiOvT9JWXXjhhVx99dW8/e1vZ3BwkIGBAa655hqOOuooFi5c2OnwtAXbTFBdwkKSpNGXZCZwMa0ZtgtYWFULkuwJfB2YBdwLHFdVj3Qqzsnk3nOPHvG1s+ZftV3XSxp969at4+1vf/tGZW9/+9tdZqbLDeseVJewkCRp1KwfofRK4I3AaUleSWtE0vVVdSBwfbMvSRqmKVOmcM0112xUds011zBlitPwdLMhfzqbLmHRfqzpLR32EhZV1V9V/dOnTx/OpZIkTXhV9UBV3dZsPw60j1Baf4PUIuDdnYlQkia2U045hSOPPHJDknrNNddw5JFHcsopp3Q4Mm3NkJaZcQkLSZLGzhBHKG16zTxgHkBfXx+Dg4NjHqe2zs9A6i4LFy7kzjvv5Mgjj9xQ9qY3vcn7T7vcNhPUISxhcS6/u4TFB5JcQmtyJJewkCRpCzYdodT6s9tSVZVksyOUqmohsBCgv7+/BgYGxiFabdHVV+FnIHWXCy64gKVLl3LHHXewevVqpk+fzpw5c7jgggs49dRTOx2etmAoPajrl7D4SZKlTdlf0UpMv5HkFOA+4Ljm2GLgKFpLWDwJnDyqEUuS1COGOUJJkjQMZ555Jp/85Cd55zvfyZNPPsluu+3GJz/5Sc4880wT1C42lFl8XcJCkqRRNoIRSpKkYXj66af5xCc+QRKee+45nnrqKT7xiU+wdu3aToemrRjSPaiSJGnUDXeEkiRpGJ5++ml23HFHDj74YJ555hl23nlnli5dym9+85tOh6atMEGVJKkDhjtCSZI0fL/5zW82JKTt2+peJqiSJEmSelJfXx/Lly9n7dq17LDDDvT19fHQQw91OixthavUSpIkadJK8uUkq5Lc0Va2Z5LrktzTPE9rypPkc0mWJbk9ySGdi1xD8eyzz7J06VKuuuoqli5dyrPPPtvpkLQN9qBKkiRpMvsK8HfAxW1l84Hrq+rcJPOb/TOAdwAHNo83AF9ontWFdtllFx5//HHe9KY3sW7dOqZMmcLjjz/OLrvs0unQtBX2oEqSJGnSqqrvAQ9vUnwMsKjZXgS8u6384mq5EdijWQ5KXegVr3gFM2bM4Mknn2TNmjU8+eSTzJgxg1e84hWdDk1bYQ+qJEmStLG+qnqg2X4Q6Gu29wHubztvRVP2AJtIMg+YB637IAcHB8csWG3eySefzF//9V8zc+bMDWW//vWvOfnkk/08upgJqiRJkrQFVVVJagTXLQQWAvT399fAwMBoh6ZtGBgY4JBDDuHjH/84Dz/8MHvuuSdf/OIXmTt3bqdD01aYoEqSJEkbeyjJ3lX1QDOEd1VTvhKY2Xbevk2ZutTcuXNZsmQJg4OD+CPBxGCCKkmSJG3sCuBE4Nzm+Vtt5R9IcgmtyZEeaxsKrA5LtrS09LZVDbuTXGPESZIkSZI0aSX5GnAD8LIkK5KcQisxPSLJPcBbm32AxcByYBnw98CfdyBkbUFVbfGx/xlXbvW4uoc9qJIkSZq0quq9Wzh0+GbOLeC0sY1ImtzsQZUkSZIkdQUTVEmSJElSVzBBlSRJkiR1BRNUSZIkSVJXMEGVJEmSJHUFE1RJkiRJUlcwQZUmuCRfTrIqyR1tZXsmuS7JPc3ztKY8ST6XZFmS25Mc0rnIpcltOG1XkqTJwnVQNzFr/lUjuu7ec48e5UikIfsK8HfAxW1l84Hrq+rcJPOb/TOAdwAHNo83AF9oniWNv68w9LYrSdKkYA+qNMFV1feAhzcpPgZY1GwvAt7dVn5xtdwI7JFk7/GJVFK7YbZdSZImhW32oCb5MvBOYFVVvbop2xP4OjALuBc4rqoeSRJgAXAU8CRwUlXdNjahS9qKvqp6oNl+EOhrtvcB7m87b0VT9gCbSDIPmAew3377jV2kktptqe3+jvY22tfXx+Dg4NhHp63yM5Ck7TeUIb5fweGD0oRVVZWkRnDdQmAhQH9//7Cvl7R9ttV2N22jAwMD4xWaNufqq/AzkKTtt80hvg4flCakh9a3veZ5VVO+EpjZdt6+TZmk7rCltitJ0qQw0ntQhzt88HckmZfkliS3rF69eoRhSNqCK4ATm+0TgW+1lf9JM5vvG4HH2tqypM7bUtuVJGlS2O5JkqqqgBENH6yq/qrqnz59+vaGIU1aSb4G3AC8LMmKJKcA5wJHJLkHeGuzD7AYWA4sA/4e+PMOhCyJYbddSZImhZEuM/NQkr2r6gGHD0qdVVXv3cKhwzdzbgGnjW1EkoZiOG1XkqTJYqQ9qA4flCRJkiSNqqEsM/M1YAB4UZIVwFm0hhx9oxmOdB9wXHP6YlpLzCyjtczMyWMQsyRJkiSpB20zQXX4oCRJkiRpPIz0HlSNglnzrxrRdfeee/QoRyJJkiRJnbfds/hKkiRJkjQaTFAlSZIkSV3BIb6SJEmSut5BZ1/LY0/9ZsTXj+T2ut133ZEfn/W2Edep4TNBlSRJktT1HnvqNyOei2VwcJCBgYFhXzfSOWM0cg7xlSRJkiR1BRNUSZIkSVJXMEGVJEmSJHUFE1RJkiRJUldwkiRJkiRJXe8Fr5jPaxbNH/kLLBpJnQAjm5hJI2OCKkmSJKnrPX73uc7iOwk4xFeSJEmS1BXsQZUkSZKGIcmRwALgecCXqurcDoc0aWxXj+bVw7929113HHl9GhETVEmSuoxffqXuleR5wOeBI4AVwM1JrqiquzobWe8b6fBeaCW223O9xo9DfCVJ6iJtX37fAbwSeG+SV3Y2Kklt5gDLqmp5VT0LXAIc0+GYpJ5hD6okbcFIhxH5C62204YvvwBJ1n/5tXemw5Js/fint3ysqkY5GnXQPsD9bfsrgDdselKSecA8gL6+PgYHB8cluMnsLW95y1aPb62NLlmyZJSj0UiZoEqS1F388tultvYFds2aNTz/+c/f4nE/n8mnqhYCCwH6+/trJDPIani29kPQSGfx1fgzQZUkaQLyy2938cvvpLISmNm2v29TJmkUeA+qJEndxS+/Une7GTgwyewkOwEnAFd0OCapZ9iDKkldYrzvefUe26614csvrcT0BOCPOxuSpPWqam2SDwDX0Jpp+8tVdWeHw5J6hgmqJEldxC+/UverqsXA4k7HIfUiE1RJkrqMX34lSZPVmCSoLjAudTfbqHqdw5clSZqYRj1BbVtg/AhaU+PfnOSKqnL9tg7r9S9svf7+RottVJ0ykjY62dqnJEmT3Vj0oLrAuIDeThgn+HuzjUqSJKkrZWsL2o7oBZNjgSOr6k+b/fcDb6iqD2xy3oYFxoGXAT8bQXUvAn65HeF2c329/N56vb6R1rV/VU0f7WA2ZRudkHX1en0T5b2NSxsdiSSrgfs6HcckN97/jvW7bKPaGtto5w2pjXZskqT2BcZHKsktVdU/SiF1VX29/N56vb7xfm9jxTbaPXX1en29/N7GS7d+KZ9MevHflUaPbbTzbKMTx5QxeE0XGJe6m21UkiRJXWksEtQNC4wn2YnWAuNXjEE9kkbGNipJkqSuNOpDfMd5gfHtGn7Y5fX18nvr9frG+70Ni210QtbV6/X18nvT5OG/K6m72UYniFGfJEmSJEmSpJEYiyG+kiRJkiQNmwmqJEmSJKkrTMgENcmRSX6WZFmS+eNQ35eTrEpyxzjUNTPJkiR3JbkzyeljXN8uSX6Y5MdNfWePZX1Nnc9L8qMkV45DXfcm+UmSpUluGYf69khyaZKfJrk7yaFjXWc3so2Oan220dGt7z82/x3vSPK1JLuMdZ3qDUnenaSSvLzTsUj6rSTPNX9D1j9mdTombZ8Jdw9qkucB/wIcAaygNSPpe6vqrjGs8w+ANcDFVfXqsaqnqWtvYO+qui3JC4BbgXeP1ftLEmBqVa1JsiPwfeD0qrpxLOpr6vww0A+8sKreOVb1NHXdC/RX1bgszJxkEfC/qupLzQy5u1XVo+NRd7ewjY56fbbR0atrH1r//V5ZVU8l+QawuKq+MtZ1a+JL8nXg94DvVNVZnY5HUkuSNVX1/BFct0NVrR2LmLR9JmIP6hxgWVUtr6pngUuAY8aywqr6HvDwWNbRVtcDVXVbs/04cDewzxjWV1W1ptndsXmM2a8WSfYFjga+NFZ1dEqS3YE/AC4EqKpnJ1ty2rCNjm59ttHRtQOwa5IdgN2Af+twPJoAkjwfeDNwCq2luUgyJcn/aEbMXJdkcZJjm2OvS/LdJLcmuab5YUvSOEkyK8n/SnJb83hTUz7QlF8B3NWU/ftmpNLSJF9sfmhXB03EBHUf4P62/RWM4ZfDTmqGKPw+cNMY1/O8JEuBVcB1VTWW9X0W+Etg3RjW0a6Aa5svCfPGuK7ZwGrgomZ45JeSTB3jOruRbXT067GNjkZFVSuB84B/BR4AHquqa8eyTvWMY4Crq+pfgF8leR3wh8As4JXA+4FDAZqRDn8LHFtVrwO+DHyyE0FLk8SubcN7L2/KVgFHVNUhwPHA59rOP4TWSKR/l+QVzfG5VXUw8BzwvvEMXr9r1NdB1ehofq29DPhQVf16LOuqqueAg5PsAVye5NVVNer38iV5J7Cqqm5NMjDar78Fb66qlUlmANcl+WnT2zYWdqD1P70PVtVNSRYA84FPjFF96iDb6KgZtzaaZBqtRGM28CjwT0n+fVX9z7GoTz3lvcCCZvuSZn8H4J+qah3wYJIlzfGXAa+m9e8ZWutNPzC+4UqTylNNctluR+DvkqxPOv9d27EfVtUvmu3DgdcBNzftdVdaya06aCImqCuBmW37+zZlPaP59fUy4KtV9c/jVW9VPdr8gT0SGIvJZuYC70pyFLAL8MIk/7Oq/v0Y1AVs6DGhqlY1v6rNAcYqQV0BrGjr3bqUVoI62dhGx4htdLu9FfhFVa0GSPLPwJsAE1RtUZI9gcOA1yQpWglnAZdv6RLgzqqalJPkSV3iPwIPAQfRGjH6dNuxJ9q2AyyqqjPHMTZtw0Qc4nszcGCS2c0kNCcAV3Q4plHTTIhyIXB3Vf3NONQ3vemVIcmutCa2+elY1FVVZ1bVvlU1i9bn9p2x/OKbZGoziQ3NUNu3MTZf6gGoqgeB+5O8rCk6nOb+hknGNjq69dlGR8+/Am9MslvzOR5O6x5iaWuOBf6hqvavqllVNRP4Ba373t/T3IvaBww05/8MmJ5mFvckOyZ5VScClyax3YEHmhEO76f1w9LmXA8c24ziIcmeSfYfpxi1BRMuQW1m2/oAcA2tLxbfqKo7x7LOJF8DbgBelmRFklPGsLq5tBrSYW3j6Y8aw/r2BpYkuZ1WYnFdVY350hLjpA/4fpIfAz8Erqqqq8e4zg8CX23+ex4M/Pcxrq/r2EZHnW10lDSjGy4FbgN+Qutv4MKxqk894738bm/pZcCLaY2cuYtWL/xttO5rfpZWUvvp5t/2Ulo99ZLGz/8ATmza4MvZuNd0g2YG/o/TmgvhduA6Wn931UETbpkZSZKkbpDk+c0SUHvR+pFlbjOaRpI0QhPxHlRJkqRucGUzBH8n4L+ZnErS9rMHVZIkSZLUFSbcPaiSJEmSpN5kgipJkiRJ6gomqJIkSZKkrmCC2oOSPNcsfXFHkn9KstsovOZJSf5uNOKTBEn+U5I7k9zetNc3JPlSklc2x9ds4bo3JrmpuebuJP9lXAOXJEkaQ87i25ueqqqDAZJ8FTgV+JuhXJjkeVX13FgGJ012SQ4F3gkcUlXPJHkRsFNV/ekQLl8EHFdVP07yPOBlYxmrJEnSeLIHtff9L+AAgCTfTHJr02szb/0JSdYk+UyzmPGhSV6f5H8n+XGSHyZ5QXPq7yW5Osk9Sf7fDrwXqVfsDfyyqp4BqKpfVtW/JRlM0r/+pCTnN+31+iTTm+IZwAPNdc81i4yT5L8k+YckNzRt9P8Z5/ckSZK03UxQe1iSHYB3AD9piv5DVb0O6Af+ollYHGAqcFNVHURrofGvA6c3+28FnmrOOxg4HngNcHySmePzTqSecy0wM8m/JPkfSf7PzZwzFbilql4FfBc4qyk/H/hZksuT/FmSXdqueS1wGHAo8J+T/N4YvgdJkqRRZ4Lam3ZNshS4BfhX4MKm/C+aXtIbgZnAgU35c8BlzfbLgAeq6maAqvp1Va1tjl1fVY9V1dPAXcD+Y/9WpN5TVWuA1wHzgNXA15OctMlp62j9WATwP4E3N9f+V1o/Ml0L/DFwdds136qqp6rql8ASYM5YvQdJkqSx4D2ovWnDPajrJRmg1Rt6aFU9mWQQWN/z8vQQ7zt9pm37Ofz3I41Y0+YGgcEkPwFO3NYlbdf+HPhCkr8HVreNhqgtXSNJkjQR2IM6eewOPNIkpy8H3riF834G7J3k9QBJXtAMFZY0SpK8LMmBbUUHA/dtctoU4Nhm+4+B7zfXHp0kTfmBtH4serTZPybJLk3COgDcPAbhS5IkjRkTj8njauDUJHfTSkJv3NxJVfVskuOBv02yK637T986fmFKk8LzabWxPYC1wDJaw30vbTvnCWBOko8Dq2jd/w3wfuD8JE82176vqp5rctbbaQ3tfRHw36rq38bjzUiSJI2WVDkCTJImumY91DVVdV6nY5EkSRoph/hKkiRJkrqCPaiSJEmSpK5gD6okSZIkqSuYoEqSJEmSuoIJqiRJkiSpK5igSpIkSZK6wv/ffh0LAAAAAAzyt57GjrJIUAEAAFgIYDK33YSXCkgAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axs = plt.subplots(1, 4, figsize=(16, 4))\n", "hdf_filled.cols['Parch'].hist(ax=axs[0])\n", "hdf_filled.cols['SibSp'].hist(ax=axs[1])\n", "hdf_filled.cols['Age'].boxplot(ax=axs[2], k=3)\n", "hdf_filled.cols['Fare'].boxplot(ax=axs[3], k=3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### So, it seems Tukey's method was very conservative... let's stick with the `Fare` column only!\n", "\n", "### This column has continuous values in a wide range of values... how about taking the `log` of it?\n", "\n", "## 2.5 Enter `assign`!\n", "\n", "### Using `assign`, it is possible to easily append a new column to the dataframe using a `lambda` function with column name(s) as argument(s)!\n", "\n", "### Under the hood, `HandySpark` will convert it to a `pandas udf` for better performance!" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "hdf_filled = hdf_filled.assign(logFare=lambda Fare: np.log(Fare + 1))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEaZJREFUeJzt3X2MZXV9x/H3R1Cq+LDoTgkFdEHR2pq6mAm1RQkVH1AMSNsgxFq16kKjjca2BpX6FG3Qihpbi1kFwQgoikRa0UqokZoUdRYRlicFupRdV3YQH0CNFvj2jzkbr+vszsw9d7hzf/t+JTdzzu88fU82+5nf/O7vnpuqQpLUrgeNuwBJ0vIy6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQqxlJNiV59rjrkFYag14akOScJL9Mcs/A68XjrkvqY89xFyCtQO+tqtP6nCDJnlV176gKkvqwR6/mJNkryQeTfK97fTDJXgPb35hka7ftVUkqyRMWcd7Tktya5O4k1yU5dmDbq5JckeRDSe4CThtovzHJD5N8McmBy3LT0i4Y9GrRW4CnA2uBpwKH8avgPRp4A/Bs4AnAkUs473eAw4FHAe8Gzk+y78D2PwZuAKaA9yT5M+DvgeO6tq8D5w97U9KwDHq16CXAO6tqW1XNAu8AXtptOwH4eFVdV1U/A94+z/F/l+RH3evO7Y1VdWFVba2q+6vqfGATMD1w3P9W1ZlVdV9V/Rw4BfjHqrqpG8Z5F3BYkv1HfcPSrhj0atHvALcNrN/WtW3fdvvAtsHl7d5XVau61+rtjUlenuTb238JAL8LrB44bsdzPQ748MD+dwL3AwcMdVfSkAx6teh7zIXsdo/t2gC28utBu6gx8yQHA2cCfw08pqpWATcCGdhtx0fB3g68cuCXxqqqemhVfX3xtyL1Z9CrRRcApyWZSrIaeCvwyW7bhcArkjw5ycOAf1jkOR/OXJDPAknyauZ69LvyEeAtSZ7M3EGrkvz5Eu9F6s2gV4veBcwA1wDXAld1bVTVF4EPAV8Bbgau7I75xa5OWFXXAP8MfIO5vwqexNybq7s65jPA+4HPJPlJV8/zhrojqYf4xSPanXW97Y3AXs57V6vs0Wu3k+T4bq79PsB7gH8z5NUyg167o5OBbcAtwH3MvcEqNcuhG0lqnD16SWrcinio2erVq2vNmjXjLkOSJsqGDRvurKqphfZbEUG/Zs0aZmZmxl2GJE2UJLctvJdDN5LUPINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1LgV8clY7R7WnPqFoY/ddPoxI6xE2r3Yo5ekxhn0ktQ4g16SGmfQS1LjDHpJatyCQZ/k7CTbkmwcaPt0kqu716YkV3fta5L8fGDbR5azeEnSwhYzvfIc4F+AT2xvqKoXb19Ocgbw44H9b6mqtaMqUJLUz4JBX1VXJFkz37YkAU4AnjXasiRJo9J3jP6ZwB1V9d2BtoOSfCvJV5M8c2cHJlmXZCbJzOzsbM8yJEk70zfoTwIuGFjfCjy2qg4F3gCcn+SR8x1YVeurarqqpqemFvxuW0nSkIYO+iR7An8KfHp7W1X9oqp+0C1vAG4Bnti3SEnS8Pr06J8N3FhVm7c3JJlKske3fDBwCHBrvxIlSX0sZnrlBcB/A09KsjnJK7tNJ/LrwzYARwDXdNMtPwucUlV3jbJgSdLSLGbWzUk7aX/5PG0XARf1L0uSNCp+MlaSGmfQS1LjDHpJapzfMKUl6fMtUZLGwx69JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjVvMl4OfnWRbko0DbW9PsiXJ1d3rBQPb3pTk5iQ3JXnechUuSVqcxfTozwGOnqf9A1W1tntdCpDk94ATgd/vjvnXJHuMqlhJ0tItGPRVdQVw1yLPdxzwqar6RVX9D3AzcFiP+iRJPfUZo39tkmu6oZ19urb9gdsH9tnctf2GJOuSzCSZmZ2d7VGGJGlXhg36M4HHA2uBrcAZSz1BVa2vqumqmp6amhqyDEnSQoYK+qq6o6ruq6r7gY/yq+GZLcCBA7se0LVJksZkqKBPst/A6vHA9hk5lwAnJtkryUHAIcA3+pUoSepjz4V2SHIBcCSwOslm4G3AkUnWAgVsAk4GqKrrklwIXA/cC7ymqu5bntIlSYuxYNBX1UnzNJ+1i/3fDby7T1GSpNHxk7GS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4xYM+iRnJ9mWZONA2z8luTHJNUkuTrKqa1+T5OdJru5eH1nO4iVJC1tMj/4c4Ogd2i4DnlJVfwB8B3jTwLZbqmpt9zplNGVKkoa1YNBX1RXAXTu0fbmq7u1WrwQOWIbaJEkjMIox+r8CvjiwflCSbyX5apJn7uygJOuSzCSZmZ2dHUEZkqT59Ar6JG8B7gXO65q2Ao+tqkOBNwDnJ3nkfMdW1fqqmq6q6ampqT5lSJJ2YeigT/Jy4IXAS6qqAKrqF1X1g255A3AL8MQR1ClJGtJQQZ/kaOCNwLFV9bOB9qkke3TLBwOHALeOolBJ0nD2XGiHJBcARwKrk2wG3sbcLJu9gMuSAFzZzbA5Anhnkv8D7gdOqaq75j2xJOkBsWDQV9VJ8zSftZN9LwIu6luUJGl0/GSsJDXOoJekxhn0ktS4Bcfopd3dmlO/MPSxm04/ZoSVSMOxRy9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcT4CQROhz2MIwEcRaPdmj16SGmfQS1LjDHpJapxBL0mNW1TQJzk7ybYkGwfaHp3ksiTf7X7u07UnyYeS3JzkmiRPW67iJUkLW2yP/hzg6B3aTgUur6pDgMu7dYDnA4d0r3XAmf3LlCQNa1FBX1VXAHft0HwccG63fC7wooH2T9ScK4FVSfYbRbGSpKXrM0a/b1Vt7Za/D+zbLe8P3D6w3+auTZI0BiN5M7aqCqilHJNkXZKZJDOzs7OjKEOSNI8+QX/H9iGZ7ue2rn0LcODAfgd0bb+mqtZX1XRVTU9NTfUoQ5K0K32C/hLgZd3yy4DPD7T/ZTf75unAjweGeCRJD7BFPesmyQXAkcDqJJuBtwGnAxcmeSVwG3BCt/ulwAuAm4GfAa8Ycc2SpCVYVNBX1Uk72XTUPPsW8Jo+RUmSRsdPxkpS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXGL+nLw+SR5EvDpgaaDgbcCq4BXA7Nd+5ur6tKhK5Qk9TJ00FfVTcBagCR7AFuAi4FXAB+oqveNpEJJUi+jGro5Crilqm4b0fkkSSMyqqA/EbhgYP21Sa5JcnaSfeY7IMm6JDNJZmZnZ+fbRZI0Ar2DPslDgGOBz3RNZwKPZ25YZytwxnzHVdX6qpququmpqam+ZUiSdmIUPfrnA1dV1R0AVXVHVd1XVfcDHwUOG8E1JElDGkXQn8TAsE2S/Qa2HQ9sHME1JElDGnrWDUCSvYHnACcPNL83yVqggE07bJMkPcB6BX1V/RR4zA5tL+1VkSRppPxkrCQ1zqCXpMYZ9JLUOINekhpn0EtS43rNupG0a2tO/cLQx246/ZgRVqLdmT16SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DinV2q30GeaozTp7NFLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxvWeXplkE3A3cB9wb1VNJ3k08GlgDXNfEH5CVf2w77UkSUs3qh79n1TV2qqa7tZPBS6vqkOAy7t1SdIYLNfQzXHAud3yucCLluk6kqQFjCLoC/hykg1J1nVt+1bV1m75+8C+Ox6UZF2SmSQzs7OzIyhDkjSfUTwC4RlVtSXJbwOXJblxcGNVVZLa8aCqWg+sB5ienv6N7ZKk0ejdo6+qLd3PbcDFwGHAHUn2A+h+but7HUnScHoFfZK9kzxi+zLwXGAjcAnwsm63lwGf73MdSdLw+g7d7AtcnGT7uc6vqi8l+SZwYZJXArcBJ/S8jiRpSL2CvqpuBZ46T/sPgKP6nFva3fV5tPKm048ZYSWadH4yVpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxo3iWTeSVpg+c/D7cg7/ymOPXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJatzQQZ/kwCRfSXJ9kuuSvK5rf3uSLUmu7l4vGF25kqSl6vNQs3uBv62qq5I8AtiQ5LJu2weq6n39y5Mk9TV00FfVVmBrt3x3khuA/UdVmCRpNEYyRp9kDXAo8PWu6bVJrklydpJ9RnENSdJwegd9kocDFwGvr6qfAGcCjwfWMtfjP2Mnx61LMpNkZnZ2tm8ZkqSd6BX0SR7MXMifV1WfA6iqO6rqvqq6H/gocNh8x1bV+qqarqrpqampPmVIknahz6ybAGcBN1TV+wfa9xvY7Xhg4/DlSZL66jPr5nDgpcC1Sa7u2t4MnJRkLVDAJuDkXhVKknrpM+vma0Dm2XTp8OVIkkbNLweXNFJ9vpjcLxZfHj4CQZIaZ9BLUuMMeklqnGP0kprgewM7Z49ekhrXRI/e3+SStHP26CWpcQa9JDXOoJekxjUxRi+pDX3eb9PO2aOXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc4PTElSTyv9wYrL1qNPcnSSm5LcnOTU5bqOJGnXlqVHn2QP4MPAc4DNwDeTXFJV1y/H9bQ0fsxc2r0sV4/+MODmqrq1qn4JfAo4bpmuJUnaheUao98fuH1gfTPwh4M7JFkHrOtW70lyU4/rrQbuHObAvKfHVUdn6PpXEO9hZZj0exhL/SPOgSXdQ89rP24xO43tzdiqWg+sH8W5ksxU1fQozjUOk14/eA8rxaTfw6TXDyvzHpZr6GYLcODA+gFdmyTpAbZcQf9N4JAkByV5CHAicMkyXUuStAvLMnRTVfcmeS3wH8AewNlVdd1yXKszkiGgMZr0+sF7WCkm/R4mvX5YgfeQqhp3DZKkZeQjECSpcQa9JDVuooN+0h+zkOTsJNuSbBx3LcNKcmCSryS5Psl1SV437pqWIslvJflGkm939b9j3DUNK8keSb6V5N/HXcswkmxKcm2Sq5PMjLueYSRZleSzSW5MckOSPxp3TTDBY/TdYxa+w8BjFoCTJukxC0mOAO4BPlFVTxl3PcNIsh+wX1VdleQRwAbgRZPy75AkwN5VdU+SBwNfA15XVVeOubQlS/IGYBp4ZFW9cNz1LFWSTcB0VU3sB76SnAv8V1V9rJtx+LCq+tG465rkHv3EP2ahqq4A7hp3HX1U1daquqpbvhu4gblPRk+EmnNPt/rg7jVxvZ8kBwDHAB8bdy27qySPAo4AzgKoql+uhJCHyQ76+R6zMDEB06Ika4BDga+Pt5Kl6YY8rga2AZdV1UTV3/kg8Ebg/nEX0kMBX06yoXtEyqQ5CJgFPt4NoX0syd7jLgomO+i1giR5OHAR8Pqq+sm461mKqrqvqtYy9wnuw5JM1DBakhcC26pqw7hr6ekZVfU04PnAa7qhzUmyJ/A04MyqOhT4KbAi3juc5KD3MQsrRDe2fRFwXlV9btz1DKv7M/srwNHjrmWJDgeO7ca4PwU8K8knx1vS0lXVlu7nNuBi5oZnJ8lmYPPAX4SfZS74x26Sg97HLKwA3ZuZZwE3VNX7x13PUiWZSrKqW34oc2/u3zjeqpamqt5UVQdU1Rrm/h/8Z1X9xZjLWpIke3dv5tMNdzwXmKjZaFX1feD2JE/qmo4CVsSkhIn9KsExPGZh5JJcABwJrE6yGXhbVZ013qqW7HDgpcC13Tg3wJur6tIx1rQU+wHndrO4HgRcWFUTOT1xwu0LXDzXb2BP4Pyq+tJ4SxrK3wDndZ3PW4FXjLkeYIKnV0qSFmeSh24kSYtg0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TG/T+hE9jsNAPAXQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "hdf_filled.cols['logFare'].hist(bins=20)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Those values to the extreme left of the histogram seem suspicious...\n", "\n", "### Let's make a stratified boxplot to try spotting outliers!" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1gAAAGoCAYAAABbkkSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHkdJREFUeJzt3X+Q3Hd93/HX+06yK4THJsXVNIFIaslQYgikObm1oOrZDkjgTNLUaUlIk5o6XOgkjDtwDfZYie3WCqZRPHFcJq6oUzMNhEIcZlpMJGjxoqQytuQUE4xJmtpW+ZXBNA0g4bEt3ad/+KTKxPhW1udub0+Px8wOur3vfv1ezd4HPe/73e9Way0AAACcuolRDwAAALBSCCwAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAGwJKrq4ar6wVHPAQCLSWABMDaq6raqeryqDp1we/2o5wKAY1aNegAAOEn/prW2/VR2UFWrWmtHeg0EAMc4ggXAkqqqM6vq16rqS/O3X6uqM0/4/i9U1Zfnv/czVdWq6kVD7Hd7VT1YVd+oqvur6odP+N7PVNXeqvr1qvrzJNtPuP9zVfV/q+r3quqFi/KkAThtCCwAltrVSf5uklckeXmS8/P/g2dbkrcm+cEkL0oyfRL7/ZMkr0xydpIdSd5XVetO+P7mJA8kOTfJO6vq0iT/MsmPzN93d5L3PdsnBQCJwAJg6f1kkn/VWvtKa+2RJNcl+an57/3jJP+htXZ/a+2bSa59msfPVtVfzN++euzO1toHWmtfbq3Ntdbel+ThJFMnPO5/t9Z+o7V2tLX2aJI3J/nl1tofz58ueH2S86vqu3o/YQBOHwILgKX2nUkOnvD1wfn7jn3v8yd878Q/H7OztXbO/O35x+6sqsuq6r5j8ZXkbyV5/gmP+9Z9rU/yrhO2/2qSuSQveFbPCgAisABYel/Kk3FzzHfP35ckX85TA2eo90RV1d9I8htJ/nmSv9paOyfJ55LUCZu1b3nY55NcfkKsndNaW9Nau3v4pwIATyWwAFhqv51ke1WdW1XPT/JLSX5r/nsfSPLGqnpJVT0nyS8Ouc/n5smAeiRJVdWb8uQRrGdyS5Krq+olefJB51TVj53kcwGApxBYACy165McSPLpJH+U5A/n70tr7feS/HqSO5P8aZJPzj/msWfaYWvt00luTnJPnjwK9uI8edGKZ3rMB5PcmOSDVfX1+Xm2PqtnBADzqrVvPWMCAJaH+aNLn0lyps+tAmAcOIIFwLJSVT86/1lZz0vyziT/RVwBMC4EFgDLzc8m+UqS/5XkaJ68cAUAjAWnCAIAAHTiCBYAAEAnqxZjp89//vPbhg0bFmPXrBCHDx/O2rVrRz0GMOasJUAv1hMWcu+99361tXbuQtstSmBt2LAhBw4cWIxds0IMBoNMT0+PegxgzFlLgF6sJyykqg4Os51TBAEAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnQgsAACATgQWAABAJwILAACgE4HFkrr99tuzcePGvP71r8/GjRtz++23j3okAADoZtWoB+D0cfvtt+fnfu7nctddd+XgwYNZv359LrjggiTJpZdeOuLpAADg1DmCxZKZnZ3NXXfdlY0bNyZJNm7cmLvuuiuzs7MjngwAAPoQWCyZubm543F1zMaNGzM3NzeiiQAAoC+BxZKZmJjIQw899JT7HnrooUxMeBkCALAy+JctS2bnzp254IILjkfWQw89lAsuuCA7d+4c8WQAANCHi1ywZI5dyOKiiy7KN7/5zTznOc/Ju971Lhe4AABgxRBYLKlLL700l156aQaDQaanp0c9DgAAdOUUQQAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6GSowKqqc6rqd6rqc1X1QFVdsNiDAQAAjJthL9N+U5LdrbUfq6ozkjxnEWcCAAAYSwsGVlWdnWRLksuSpLX2eJLHF3csAACA8VOttWfeoOoVSXYl+WySlye5N8kVrbXD37LdTJKZJFm3bt0PvP/971+UgVkZDh06lOc+97mjHgMYc9YSoBfrCQu58MIL722tTS203TCBNZXkk0le2Vq7u6puSvL11tovfrvHTE1NtQMHDpzszJxGBoNBpqenRz0GMOasJUAv1hMWUlVDBdYwF7n4QpIvtNbunv/6d5L87VMZDgAAYCVaMLBaa3+W5PNV9eL5uy7Ok6cLAgAAcIJhryL4liTvnb+C4INJ3rh4IwEAAIynoQKrtfapJAuebwgAAHA6G+qDhgEAAFiYwAIAAOhEYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnQgsAACATgQWAABAJwILAACgE4EFAADQyapRD8B4qKpRjzCU1tqoRwAA4DTmCBZDaa11va1/+4e771NcAQAwagILAACgE4EFAADQicACAADoRGABAAB0IrAAAAA6EVgAAACdCCwAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnQgsAACATgQWAABAJwILAACgE4EFAADQicACAADoZNUwG1XVw0m+keRokiOttanFHAoAAGAcDRVY8y5srX110SYBAAAYc04RBAAA6GTYI1gtyUerqiX5d621Xd+6QVXNJJlJknXr1mUwGHQbkpXJawQ4VYcOHbKWAF1YT+hl2MB6VWvti1X115J8rKo+11rbe+IG89G1K0mmpqba9PR030lZWXbfEa8R4FQNBgNrCdCF9YRehjpFsLX2xfn//UqSDyU5fzGHAgAAGEcLBlZVra2qs479OclrknxmsQcDAAAYN8OcIrguyYeq6tj272ut7V7UqQAAAMbQgoHVWnswycuXYBYAAICx5jLtAAAAnQgsAACATgQWAABAJwILAACgE4EFAADQicACAADoRGABAAB0IrAAAAA6EVgAAACdCCwAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoJNVox6A/l5+3UfztUefGPUYC9pw5R2jHuEZnb1mde675jWjHgMAgDEisFagrz36RB6+4ZJRj/GMBoNBpqenRz3GM1ruAQgAwPLjFEEAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWACMnR07dmTNmjV59atfnTVr1mTHjh2jHgkAkvigYQDGzI4dO3Lttddm3759OXz4cNauXZvNmzcnSa6++uoRTwfA6c4RLADGyvXXX599+/Zl06ZNSZJNmzZl3759uf7660c8GQAILADGzNzc3PG4OmbTpk2Zm5sb0UTAOJuZmcnk5GQuvPDCTE5OZmZmZtQjMeacIgjAWJmYmMj+/fufEln79+/PxITfGQInZ2ZmJu9+97uze/funHnmmXnssceybdu2JMmuXbtGPB3jyv8bATBWtm/fns2bN2f//v1JnoyrzZs3Z/v27SOeDBg3t956a3bv3p2tW7cmSbZu3Zrdu3fn1ltvHfFkjDNHsAAYK8cuZLFly5YcOXIkq1atyrXXXusCF8BJm5ubOx5Xx2zdutUpx5wSR7AAGDtXX311Hn300XzsYx/Lo48+Kq6AZ2ViYiJ79ux5yn179uxxyjGnxKsHAIDT0uWXX55t27Ydj6w9e/Zk27Ztufzyy0c8GePMKYIALJmqGvUIQ2mtjXoEYAkcu5DF6173uszNzWViYiJvetObXOCCU+IIFgBLprXW9bb+7R/uvk9xBaeXXbt25ejRo7nzzjtz9OhRccUpE1gAAACdCCwAAIBOBBYAAEAnAgsAgNPWzMxMJicnc+GFF2ZycjIzMzOjHokx5yqCAACclmZmZvLud787u3fvzplnnpnHHnss27ZtSxIXu+BZq8W4WtLU1FQ7cOBA9/0ynJe952WjHmHF+KN/+kejHgF4BhuuvCMP33DJqMcAxtTk5GQ+8pGPZOvWrRkMBpmens6ePXvyute9LkePHh31eCwzVXVva21qoe2GPoJVVZNJDiT5Ymvth05lOBbXNx64Ydn/g+PYIracbbjyjlGPAAAsorm5uWzduvUp923dujVzc3MjmoiV4GTeg3VFkgcWaxAAAFhKExMT2bNnz1Pu27NnTyYmXKaAZ2+oV09VvSDJJUn+/eKOAwAAS+Pyyy/Ptm3bjkfWnj17sm3btlx++eUjnoxxNuwpgr+W5BeSnPXtNqiqmSQzSbJu3boMBoNTHo5nb7n//R86dGjZz5gs/79HwM8p8Oy94Q1vyJe+9KW89rWvTWstVZVLLrkkb3jDG6wtPGsLBlZV/VCSr7TW7q2q6W+3XWttV5JdyZMXuVju769Z0Xbfsezf3zQO78Eah79HOO35OQVO0bE1ZCz+bcJYGOYUwVcm+eGqejjJ+5NcVFW/tahTAQAAjKEFA6u1dlVr7QWttQ1JfjzJx1tr/2TRJwMAABgzLpECAMBpa3Z2NqtXr85FF12U1atXZ3Z2dtQjMeaG/hysJGmtDZIMFmUSAABYQrOzs7nxxhszGAwyNzeXiYmJ4+/D2rlz52iHY2w5ggUAwGnppptuymAwyJYtW5IkW7ZsyWAwyE033TTiyRhnAgsAgNPS3Nzc8bg6ZsuWLZmbmxvRRKwEAgsAgNPSxMRE9u7d+5T79u7dm4kJ/0Tm2fPqAQDgtHTFFVdkenr6eGTt3bs309PTueKKK0Y8GePspC5yAQAAK8WxC1lcfPHFOXr0aCYnJ/PWt77VBS44JY5gAQBw2tq5c2eeeOKJfPzjH88TTzwhrjhlAgsAAKATgQUAANCJwAIAAOhEYAEAAHTiKoIAAIyVqhr1CENprY16BEbAESwAAMZKa637bf3bP9x9n5yeBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6GTVqAdgcWy48o5Rj7Cw3ct7xrPXrB71CAAAjBmBtQI9fMMlox5hQRuuvGMs5gQAgJPhFEEAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHTiMu0APK2XX/fRfO3RJ0Y9xoLG4XP/zl6zOvdd85pRjwHAEhBYDKWq+u/znd13mdZa/53Caeprjz6x7D+vbjAYZHp6etRjLGgcIhCAPpwiyFBaa11vd955Z/d9iisAAEZNYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnSwYWFX1V6rqnqq6r6rur6rrlmIwAACAcbNqiG0eS3JRa+1QVa1O8gdV9XuttU8u8mwAAABjZcHAaq21JIfmv1w9f2uLORQAAMA4GuYIVqpqMsm9SV6U5F2ttbufZpuZJDNJsm7dugwGg45jstIcOnTIawTGwHL/OR2ntWRc5oTTmZ9TehgqsFprR5O8oqrOSfKhqnppa+0z37LNriS7kmRqaqpNT0/3npUVZDAYxGsElrnddyz7n9OxWUvG4O8STnt+TunkpK4i2Fr7iyR3Jtm2OOMAAACMr2GuInju/JGrVNWaJK9O8rnFHgwAAGDcDHOK4F9P8p7592FNJPlAa+3DizsWAADA+BnmKoKfTvL9SzALAADAWDup92ABAADw7QksAACATgQWAABAJwILAACgE4EFAADQyTCXaQcAgGfl5dd9NF979IlRjzGUDVfeMeoRntHZa1bnvmteM+oxWIDAAgBg0Xzt0Sfy8A2XjHqMBQ0Gg0xPT496jGe03AOQJzlFEAAAoBOBBQAA0InAAgAA6MR7sAB4Wme95Mq87D1XjnqMhb1n1AMs7KyXJMnyfw8KAKdOYAHwtL7xwA3L/o3p4/Cm9MQb0wFOJ04RBAAA6ERgAQAAdOIUQQC+rbE4tW338p/x7DWrRz0CAEtEYAHwtJb7+6+SJwNwHOYE4PThFEEAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWAAAAJ34oGEAlkxV9d/nO7vvMq21/jsF4LTgCBYAS6a11vV25513dt+nuALgVAgsAACATgQWAABAJwILAACgE4EFAADQicACAADoRGABAAB0IrAAAAA6EVgAAACdCCwAAIBOBBYAAEAnAgsAAKATgQUAANDJqlEPAADAynXWS67My95z5ajHGM57Rj3AMzvrJUlyyajHYAECCwCARfONB24Y9QgrxtlrVo96BIYgsAAAWDQP3zAeR1w2XHnH2MzK8uY9WAAAAJ0ILAAAgE4EFgAAQCcCCwAAoJMFA6uqXlhVd1bVZ6vq/qq6YikGAwAAGDfDXEXwSJK3tdb+sKrOSnJvVX2stfbZRZ4NAABgrCx4BKu19uXW2h/O//kbSR5I8l2LPRgAAMC4Oan3YFXVhiTfn+TuxRgGAABgnA39QcNV9dwktyf5F621rz/N92eSzCTJunXrMhgMes3ICnTo0CGvEeCUWUuAnqwn9FCttYU3qlqd5MNJ9rTWblxo+6mpqXbgwIEO47FSDQaDTE9Pj3oMYMxZS4BeNlx5Rx6+4ZJRj8EyVlX3ttamFtpumKsIVpJbkzwwTFwBAACcroZ5D9Yrk/xUkouq6lPzt9ct8lwAAABjZ8H3YLXW/iBJLcEsAAAAY+2kriIIAADAtyewAAAAOhFYAAAAnQz9OVjQw8GDB3PbbbflwQcfzCc+8YlcdtllWb9+/ajHAgCALhzBYskcPHgwN998c2ZnZ/PGN74xs7Ozufnmm3Pw4MFRjwYAAF0ILJbMbbfdluuuuy5r165NkqxduzbXXXddbrvtttEOBgAAnQgslszc3NzxuDpm7dq1mZubG9FEAADQl8BiyUxMTOTw4cNPue/w4cOZmPAyBABgZfAvW5bMZZddlmuuueZ4ZB0+fDjXXHNNLrvsstEOBgAAnbiKIEtm/fr1ectb3pKdO3cev4rgW97yFlcRBABgxRBYLKn169fnmmuuyWAwyPT09KjHAQCArpwiCAAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnbiKIAAAY6WqFme/7+y7v9Za3x0yFhzBAgBgrLTWut/uvPPO7vvk9CSwAAAAOhFYAAAAnQgsAACATgQWAABAJwILAACgE4EFAADQicACAADoRGABAAB0IrAAAAA6EVgAAACdCCwAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnQgsAACATgQWAABAJwsGVlX9ZlV9pao+sxQDAQAAjKthjmDdlmTbIs8BAAAw9hYMrNba3iR/vgSzAAAAjDXvwQIAAOhkVa8dVdVMkpkkWbduXQaDQa9dswIdOnTIawQ4ZdYSoBfrCb10C6zW2q4ku5JkamqqTU9P99o1K9BgMIjXCHCqrCVAL9YTenGKIAAAQCfDXKb9t5PcleTFVfWFqrp88ccCAAAYPwueItha+4mlGAQAAGDcOUUQAACgE4EFAADQicACAADoRGABAAB0IrAAAAA6EVgAAACdCCwAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnQgsAACATgQWAABAJwILAACgE4EFAADQicACAADoRGABAAB0IrAAAAA6EVgAAACdCCwAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADoRWAAAAJ0ILAAAgE4EFgAAQCcCCwAAoBOBBQAA0InAAgAA6ERgAQAAdCKwAAAAOhFYAAAAnQwVWFW1rar+uKr+tKquXOyhAAAAxtGCgVVVk0neleS1Sb43yU9U1fcu9mCsTDt27MiaNWvy6le/OmvWrMmOHTtGPRIwhmZnZ7N69epcdNFFWb16dWZnZ0c9EjCmbrnlljzvec/LJZdckuc973m55ZZbRj0SY27VENucn+RPW2sPJklVvT/JjyT57GIOxsqzY8eOXHvttdm3b18OHz6ctWvXZvPmzUmSq6++esTTAeNidnY2N954YwaDQebm5jIxMZHp6ekkyc6dO0c7HDBWbrnllrztbW/LPffck0ceeSTnnntuzj///CTJm9/85hFPx7ga5hTB70ry+RO+/sL8fXBSrr/++uzbty+bNm1KkmzatCn79u3L9ddfP+LJgHFy0003ZTAYZMuWLUmSLVu2ZDAY5KabbhrxZMC4ueqqq3LPPffkvPPOS5Kcd955ueeee3LVVVeNeDLG2TBHsIZSVTNJZpJk3bp1GQwGvXbNCnHkyJEcPnw4g8Eghw4dOv4aOXLkiNcLMLSjR49mbm7uL60lR48etZYAJ+Xxxx/PI4888pfWk8cff9x6wrM2TGB9MckLT/j6BfP3PUVrbVeSXUkyNTXVjp2uAcesWrUqa9euzaZNmzIYDDI9PZ39+/dn1apV8XoBhjU5OZmJiYnjR66mp6ezd+/eTE5OWkuAk3LGGWfk3HPPzXnnnXd8Pbn//vtzxhlnWE941oY5RXB/ku+pqo1VdUaSH0/ynxd3LFai7du3Z/Pmzdm/f3+SZP/+/dm8eXO2b98+4smAcXLFFVccj6ok2bt3b6anp3PFFVeMeDJg3LzjHe/I+eefn/vvvz9Jcv/99+f888/PO97xjhFPxjhb8AhWa+1IVf18kj1JJpP8Zmvt/kWfjBXn2IUstmzZkiNHjmTVqlW59tprXeACOCnHLmRx8cUX5+jRo5mcnMxb3/pWF7gATtqxC1m86lWvyuOPP54zzjgjv/qrv+oCF5ySaq113+nU1FQ7cOBA9/2ychw7DA9wKqwlQC/WExZSVfe21qYW2m6oDxoGAABgYQILAACgE4EFAADQicACAADoRGABAAB0IrAAAAA6EVgAAACdCCwAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOikWmv9d1r1SJKD3XfMSvL8JF8d9RDA2LOWAL1YT1jI+tbauQtttCiBBQupqgOttalRzwGMN2sJ0Iv1hF6cIggAANCJwAIAAOhEYDEqu0Y9ALAiWEuAXqwndOE9WAAAAJ04ggUAANCJwAIAAOhEYPGMqupoVX2qqj5TVR+squc8w7bXVtXsEs72j6rq/qqaqyqXVYVlbJmvJb9SVZ+rqk9X1Yeq6pyl+m8DJ2+Zryf/en4t+VRVfbSqvnOp/tssHwKLhTzaWntFa+2lSR5P8uZRD3SCzyT5h0n2jnoQYEHLeS35WJKXtta+L8mfJLlqxPMAz2w5rye/0lr7vtbaK5J8OMkvjXoglp7A4mT8fpIXJUlV/fT8b2juq6r/+K0bVtWbqmr//PdvP/bbpfmjTp+Zv3/v/H3nVdU987/t+XRVfc8ww7TWHmit/XHH5wcsjeW2lny0tXZk/stPJnlBl2cJLIXltp58/YQv1yZxNbnT0KpRD8B4qKpVSV6bZHdVnZdke5LNrbWvVtV3PM1Dfre19u75x16f5PIkN+fJ3+Rsba198YTTcN6c5KbW2nur6owkk/OP+/0kZz3Nvmdba/+15/MDlsYYrCX/LMl/OrVnCSyF5bqeVNWOJD+d5GtJLuz0dBkjAouFrKmqT83/+feT3JrkZ5N8sLX21SRprf350zzupfOL1zlJnptkz/z9/z3JbVX1gSS/O3/fXUmurqoX5MnF73/O7/fvLcYTAkZi2a8lVXV1kiNJ3nuyTw5YUst6PWmtXT3/2KuS/HySa57Fc2SMCSwW8uj8ecTHVdUwj7styT9ord1XVZclmU6S1tqbq+rvJLkkyb1V9QOttfdV1d3z932kqn62tfZxR7BgRVnWa8n8vn8oycXNB0TCcres15MTvDfJRyKwTjsCi2fj40k+VFU3ttb+T1V9x9P8puisJF+uqtVJfjLJF5Okqv5ma+3uJHdX1WuTvLCqzk7yYGvt16vqu5N8X5KPO4IFK96yWEuqaluSX0jy91tr3+z7FIElslzWk+85drQryY8k+Vy/p8i4EFictNba/fPnF3+iqo4m+R9JLvuWzX4xyd1JHpn/32O/7fmV+TeKVpL/luS+JG9P8lNV9USSP0vyy8PMUVU/mifPnT43yR1V9anW2tZTeW7A0lkua0mSf5vkzCQfm/8t+Cdba8vpqmTAApbRenJDVb04yVySg1leVzhkiZQzIQAAAPpwmXYAAIBOBBYAAEAnAgsAAKATgQUAANCJwAIAAOhEYAEAAHQisAAAADr5fxsYz502uePaAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axs = hdf_filled.stratify(['Pclass']).cols['logFare'].boxplot(figsize=(12, 6))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### So, apparently we have some outliers, which are quite different depending on `Pclass`... let's clean them up!\n", "\n", "## 2.6 Enter `fence`!\n", "\n", "### Using `fence`, you can `cap` values to the lower and upper fence values, according to Tukey's method!" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Fare': [-26.0105, 64.4063]}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf_fenced = hdf_filled.fence(['Fare'])\n", "hdf_fenced.fences_" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Fare': [-26.0105, 64.4063],\n", " 'logFare': {'Pclass == \"1\"': [1.856486835561669, 6.122346420708724],\n", " 'Pclass == \"2\"': [1.6538880250316523, 4.281006170587935],\n", " 'Pclass == \"3\"': [1.2175936795640057, 3.7548204017120526]}}" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf_fenced = hdf_filled.stratify(['Pclass']).fence(['logFare'])\n", "hdf_fenced.fences_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### \"But I want to use those values to cap outliers in the test set as well...\"\n", "\n", "## 2.7 Enter the `transformers` generator (again!)\n", "\n", "### You can generate a custom PySpark fencer transformer that will perform the stratified fencing!" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Fare': [-26.0105, 64.4063],\n", " 'logFare': {'Pclass == \"1\"': [1.856486835561669, 6.122346420708724],\n", " 'Pclass == \"2\"': [1.6538880250316523, 4.281006170587935],\n", " 'Pclass == \"3\"': [1.2175936795640057, 3.7548204017120526]}}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fencer = hdf_fenced.transformers.fencer()\n", "fencer.getDictValues()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How about using `pandas` to append new columns?\n", "\n", "## 2.8 Enter the `pandas` object from `HandySpark`!\n", "\n", "### Most of column functions are available, like `isin`:" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Column(Embarked,)`'>" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "some_ports = hdf_fenced.pandas['Embarked'].isin(values=['C', 'Q'])\n", "some_ports" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The corresponding `pandas udf` is automatically generated and can be used directly in an `assign` expression:" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 True\n", "2 False\n", "3 False\n", "4 False\n", "Name: is_c_or_q, dtype: bool" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf_fenced = hdf_fenced.assign(is_c_or_q=some_ports)\n", "hdf_fenced.cols['is_c_or_q'][:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Even pandas objects like `str` and `dt` are available, with lots of already supported methods, like `find`:" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameis_mrs
0Braund, Mr. Owen HarrisFalse
1Cumings, Mrs. John Bradley (Florence Briggs Th...True
2Heikkinen, Miss. LainaFalse
3Futrelle, Mrs. Jacques Heath (Lily May Peel)True
4Allen, Mr. William HenryFalse
\n", "
" ], "text/plain": [ " Name is_mrs\n", "0 Braund, Mr. Owen Harris False\n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... True\n", "2 Heikkinen, Miss. Laina False\n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) True\n", "4 Allen, Mr. William Henry False" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "col_mrs = hdf_fenced.pandas['Name'].str.find(sub='Mrs.')\n", "hdf_fenced.assign(is_mrs=col_mrs > 0).cols[['Name', 'is_mrs']][:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2.9 Evaluation\n", "\n", "### So, you pre-processed your data and trained your classification model... how good is it?" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8464432940274191" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pyspark.ml.feature import VectorAssembler\n", "from pyspark.ml.classification import RandomForestClassifier\n", "from pyspark.ml.pipeline import Pipeline\n", "from pyspark.ml.evaluation import BinaryClassificationEvaluator\n", "\n", "assem = VectorAssembler(inputCols=['Fare', 'Pclass', 'Age'], outputCol='features')\n", "rf = RandomForestClassifier(featuresCol='features', labelCol='Survived', numTrees=20)\n", "pipeline = Pipeline(stages=[assem, rf])\n", "model = pipeline.fit(hdf_fenced)\n", "\n", "predictions = model.transform(hdf_fenced)\n", "\n", "evaluator = BinaryClassificationEvaluator(labelCol='Survived')\n", "evaluator.evaluate(predictions)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### This is the Area under the ROC Curve, fine... but what about Precision-Recall curve? Thresholds? Confusion Matrix?\n", "\n", "## Enter the (extended) BinaryClassificationMetrics!" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "bcm = BinaryClassificationMetrics(predictions, scoreCol='probability', labelCol='Survived')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### You can PLOT both ROC and PR curves!" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtgAAAEWCAYAAABc2eBcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3XeYVOX1wPHv2c7CsgsssMDSu4BIE8Go2LGgxG6MIBiN5mdLNJbEJBo1lthDNBoLYNdYQMWGKCqKgIiC9N7LUrfX8/vjvesOy5Zh2dk7O3s+zzPP3DZzz8zOvvfMO28RVcUYY4wxxhhTO6L8DsAYY4wxxphIYgm2McYYY4wxtcgSbGOMMcYYY2qRJdjGGGOMMcbUIkuwjTHGGGOMqUWWYBtjjDHGGFOLLMEOcyJyiYh87Hcc4UREskSkiw/n7SQiKiIxdX3uUBCRn0RkRA0eZ59JYxqIYMoJEenglcvRdRRWyInIWhE5yVu+Q0Re9DsmU79Ygn0QvH+4XK8g2SoiE0WkSSjPqaovqeopoTxHIBEZLiIzRCRTRPaKyLsiclhdnb+CeD4Xkd8EblPVJqq6OkTn6yEib4hIhvf6fxSRP4TbhcNL9LsdynOoah9V/bya8xzwpeJQPpMicqSITBORPSKyS0TmiMi4mjyXMQ1ZuevRtlBdj4IpJ1R1vVcuF9f2+b3kttB7nXtE5GsRGVbb5zkUItJURB4VkfVenKu89VS/YzP+sQT74I1S1SbAEcAA4Daf46mRimphvULrY2AK0BboDPwAzApFjXG41QSLSFfgW2AD0E9Vk4HzgcFAUi2fy7fX7te5vc/XDGAm0A1oAVwNnFbD5wurLz3G+KD0ejQQV07dXv4Acer7tf4173WmAp8Bb/gcz89EJA74FOgDjASaAsOAncCRNXi+sLoumkOgqnYL8gasBU4KWH8AeD9gPR54EFgPbAP+AzQK2H82sADYB6wCRnrbk4FngS3AJuBuINrbdxnwlbf8JPBguZimAH/wltsCbwI7gDXAdQHH3QH8D3jRO/9vKnh9XwJPVLD9A2CytzwC2Aj8Ccjw3pNLgnkPAh57C7AVeAFoBrznxbzbW073jr8HKAbygCxggrddgW7e8kTg38D7QCYuQe4aEM8pwDJgL/AELrk74LV7x74Y+PesYH8n79xjvdeXAfw5YP+RwDfAHu9vOQGIC9ivwP8BK4A13rbHcAn9PuA74JiA46O993mV99q+A9oDX3jPle29Lxd6x5+J+3ztAb4GDi/32b0F+BHIB2II+Dx7sc/z4tgGPOxtX++dK8u7DSPgM+kd0wf4BNjlPfZPlbx/XwH/ruL93e95K/lbPwlM81576ecoOuD4XwI/estRwK3e+7cTeB1o7nc5Yje71caNA69H/wTe85Y/x5Wfs4Bc3BfaSq8z3mOuAJZ4Zc1iYGD581RRTnTy/ldjvPW2wFSvTFgJXBFwnju8/8XJ3rl+AgZX8TrvAF4MWD/MO1fLgG1VlX3tgbdw15idlF1HuuK+8O/EleUvASkVvb/lYygX32+896JJFa/h53LMW58I3O0tj+DA6+IS4MyA42O8+Ev/Jkd5r3MPrhJshN+fR7sdeKvv32p9IyLpuJq3lQGb7wN64Gq3uwHtgL96xx+JK1D+CKQAx+L+gcH9sxV5jxmASwr3axbheQW4UETEe85m3rGvejUU7+L+2doBJwI3iMipAY8/G5dkp+AKk8DXkwgMp+KagdeBkwPW03A1Ce1wyebTItKzuvcg4LHNgY7Albgk6HlvvQPuYjABQFX/jEv6r1H38+M1FcQGcBFwJy5ZX4m7sOD9PPc/3K8MLXCJ9vBKngPgJO/46vwC6Il7j/8qIr297cXA73HvzTBv/+/KPXY0MBR3kQCYi3uvmgMvA2+ISIK37w/AxcDpuFqR8UCOqh7r7e/vvS+vicgA4Dngt95rfQqYKiLxAee+GDgDdxEpKhfXY8BjqtoUd+F53dteeq4U71zfBD5IRJKA6cCHuItqN1xtDuWOS/Tek2De36r8Cvf3TfJizgZOKLf/ZW/5Wtz7fZwX227clzFjIoqItMeVE98HbL4UV8YmAeuo4jojIufjksgxuLLmLFziWV5l5UR5r+KSxrbAecA/RCTw//Qs75gUXCI+IcjXGefFuBP3/0xVZZ/3K9d73uvvhLsevVr6dMC9Xoy9cYn4HcHEUc5JwIeqmlWDx5Yqf118BVdelzoVyFDV+SLSDlehdLf3mJuAN0Wk5SGc34SC3xl+fbrhEuIs3LduxSUSKd4+wV3sA2tPh1FWU/kU8EgFz9kaV6MYWNN9MfCZt3wZZTXYgqtRPNZbvwKY4S0PBdaXe+7bgOe95TuAL6p4benea+pVwb6RQKG3PAJXSDcO2P868Jcg3oMRQAGQUEUcRwC7A9Y/p1yNMwfWaj4TsO90YKm3PAb4JmCf4GqLK6vBLsT7VaGS/Z28c6cHbJsDXFTJ8TcAb5eL+4RqPmO7cYkzuC8EZ1dyXPkakSeBu8odsww4LuCzO76Cz3NpDc0XuC8pqZW85piAbYGfyYuB74P432lX2eerouet4m89udz+u4HnvOUk7/PX0VtfApwYcGwb728cU128drNbuN8oux7twSWQT1D2a+HnwN8Djq3uOvMRcH0V5wm6nMAlqsVAUsD+e4GJ3vIdwPSAfYcBuVW8zjtw14093vPuJKDGtqqyD3f92RHM/zzuy/j3lbzuO6i8BvsT4L5qnru6Guz9rou4L0GZQKK3/hLwV2/5FuCFcs//ETDW78+k3fa/WQ32wRutqkm4f4peuNpKgJZAIvCd1xFjD65Wr/RbZXvcT9XldQRigS0Bj3sKaFX+QHX/Sa9S9s32V5TVRHcE2pY+h/c8f8IVrKU2VPG6dgMluCSkvDa4n9B+PlZVswPW1+FqAap7DwB2qGpe6YqIJIrIUyKyTkT24QrwlINsX7s1YDkHKO3o05aA1+y9fxureJ6dVPz6gzqf10HyPa8D7D7gH5R9Pkrt9zcQkZtEZInXoXIP7mfc0sdU9pmpSEfgxnJ///a496DCc5dzOe6Xh6UiMldEzgzyvMHGWNXn62CUfw0vA+d4NfXnAPNVdZ23ryPwdsD7sQR3gW6NMZFhtKqmqGpHVf2dquYG7Av8X6nuOhPs/3Ew5URbYJeqZgZsW4f7kl2qfBmaICIx4kYoyvJuHwQc87qqpuD+dxcBg8q9tsrKvvbAOj3wFztEpLWIvCoim7zy+kUOLK+DEex1oyr7XRdVdSWuvBrl/fp3FmW/zHUEzi/3en9RCzGYWmYJdg2p6kzct9AHvU0ZuOYNfbwCL0VVk9V1zABX2HWt4Kk24GoWUgMe11RV+1Ry6leA80SkI67W+s2A51kT8BwpqpqkqqcHhl3F68nGtR8+v4LdF7D/z/7NRKRxwHoHYHMQ70FFMdyIa24xVN3PjqVNEqS6mIOwBVcz757QNa1Jr/xwpgPnHsL5ngSWAt291/Inyl5HqZ9fj4gcA9yMe3+beReQvQGPqewzU5ENwD3l/v6JqvpKRecuT1VXqOrFuAvu/cD/vL9xde//BqDaDrCqmoP7fFX1/mbjvqABICJpFT1VueddjLt4n8b+zUNKYzut3HuSoKqbqovXmAgQ+L9S3XUmqLKminIi0Gagudd8rFQHXLvv6p7/JXVN0Zqo6gGdn1U1A9eE4g4RKU0oqyr7NgAdKuk4+A/ce9TPK69/zYHldTCmA6dW8D4EyiGgbMM1CQlUUTlb2kzkbGCxl3SDe00vlHu9jVX1vhrEbkLIEuxD8yhwsoj0V9US4L/AIyLSCkBE2gW0gX4WGCciJ4pIlLevl6puwY3c8ZA31E+UiHQVkeMqOqGqfo9LZJ8BPlLVPd6uOUCmiNwiIo1EJFpE+orIkIN4PbcCY0XkOhFJEpFmInI37me2O8sde6eIxHlJ4pnAG0G8BxVJwiXle0SkOfC3cvu3EUQCV4n3gX4iMtorYP+PAwu2QH8DhovIP0uTOxHpJiIvikhKEOdLwnX+yRKRXrgRMqo7vgjvJ0wR+Suu/WOpZ4C7RKS7OIeLSAtvX/n35b/AVSIy1Du2sYicUe4iVykR+bWItPT+hqWfqRIvthIq/xu8B7QRkRu8No9JIjK0kmNvBi4TkT+Wvg4R6S8ipW0ifwD6iMgR4tqh3xFM7Lik+nrcl7PAPgT/Ae7xvowiIi1F5Owgn9OYiBHEdeYZ4CYRGeSVH91K/28CVVFOBJ5rA64D3r0ikiAih+NqvmtlHGlVXYZrEnGzt6mqsm8OrqLlPm97gogc7T0uCdfEZq+4ds1/rGFIL+CS3jdFpJf33rYQkT+JSGkF1wLgV951eSSu+Up1XsW1k7+a/SsOXsTVbJ/qPV+CiIwQ1y/MhBFLsA+Bqu7AdVws7cR3C66T3WzvJ6fpuNpZVHUOMA54BFdLORP3Uw+4tsJxuJ7bu3Edwar6uedlXMeKn//p1I0/eiauDfMaypLw5IN4PV/hOlOcgyuU1uE6w/xCVVcEHLrVi3MzronKVaq6tLr3oBKPAo28eGfjmpQEegxXY79bRB4P9rV4rycDVyP/AO5nvMNwPeDzKzl+Fe7LRCfgJxHZi/uFYB6uPVx1bsLVombiCv3Xqjn+I9zrXY57r/PY/2fdh3Ht2z/GJe7P4t4rcMnnJO8nwgtUdR6uTf4E3N9mJa5Nc7BG4l5zFu49v0hVc72a53twQzXuEZGjAh/k/Qx8MjAK97lYARxf0QlU9Wtch8QTgNUisgt4GjcqCKq6HPg77jOzAjfqSDBewV2wZnh/81KP4TpQfSwimbjPV2XJvzGRrtLrjKq+gfs/fxlXfr2D60BXXoXlRAXHXYwrRzcDbwN/U9Xptfha/glcKSKtqir7vOviKFyb5vW4JoIXes9xJ254w724ypi3ahKIqubjrsdLce2x9+ES+1TcqFbgKgBG4b6UXIJ7f6t73i24X/2GE3At8b7AnI37hXQH7prxRyyfCzvimqUaExxxM3q9qKr17tuyuJFWNuKGFfzM73iMMcYYE5nsG4+JaN7PaCniOsGVtome7XNYxhhjjIlglmCbSDcM1zs+A/cT3ehKftI0xhhjjKkV1kTEGGOMMcaYWmQ12MYYY4wxxtSiisaGDGupqanaqVMnv8MwxpiD9t1332WoaoOa0tjKbGNMfVbTcrveJdidOnVi3rx5fodhjDEHTUTWVX9UZLEy2xhTn9W03LYmIsYYY4wxxtQiS7CNMcYYY4ypRZZgG2OMMcYYU4sswTbGGGOMMaYWWYJtjDHGGGNMLbIE2xhjjDHGmFoUsgRbRJ4Tke0isqiS/SIij4vIShH5UUQGhioWY4wx1bNy2xhjakcox8GeCEwAJley/zSgu3cbCjzp3RtjTHhShcIsyN0JebvcraTQ76hq00Ss3DbGmEMWsgRbVb8QkU5VHHI2MFlVFZgtIiki0kZVt4QqJmNMGCnIhOKCg39cUR5kbYKsjZDp3bI2uu21raQI8vdA3s6ypLoGCfV3G9tw1yfH1X58tSzU5faGDbBjh/ueEgotWoBNGmmMCQd+zuTYDtgQsL7R23ZAQS0iVwJXAnTo0KFOgjPG1KKcHbD0FSjMcevb5sLKKaDF/sZVEzGJ0KgFJDR3t+j4Sg8tLBLuebMdd7/ZjuISqcMgQyaocruyMrugABYsgJKS2g8sPx8aN4axY0Ei4q02xtRn9WKqdFV9GngaYPDgwSGq+zDGHDQtgaWvwXcPQd7uyo/L3gpFOQduj2sKUQdZDEXHQeO2kJQOTdK9+3YQ1+TgnicoUZDQzCXSpUl1TEJQj/zpp+2MGfMO8+e73PP664fy2GMhCDEMVVZmp6dD27bQsmXtn3PdOti1q/af1xhjasLPBHsT0D5gPd3bZowJV6unwZIXy37j37UEdvwQ3GM7ngKtBpStdzkd0o+t/Rh9VlxcwsMPf8Ptt39GQUExHTsmM3HiaEaM6BQJCXaDKrczMqpO2ps3h9TUuovHGFN/+JlgTwWuEZFXcZ1k9lr7a2PC3Je3QsbC/bc1aQvD7oQOx1f+uJhG7rgG4JNPVnPzzdMBuOKKgTz00CkkJVXejKSeiZhyu7LkuSCgW8DOnZCdDU2aQG6uu0VFQVGRe3xsLAwfDj171l3cxpj6IWQJtoi8AowAUkVkI/A3IBZAVf8DTANOB1YCOcC4UMVijKnCjh9hxrWunXRVtBh2L3fLR98NyV1cc4lOp0Bs49DHWU+MHNmNa689ktNO68Zpp3X3O5yDEknldlW1z0VFrsNlfr5LmEVg9263XlzstoH7oSYuzi2LuEQ7Ntat5+VBVhY0bRr612KMqX9COYrIxdXsV+D/QnV+YyKSqhvZIlD2ZljyCuxeWrPn/GlS8MfGNIIjb4Ohf7KeZJ716/dy9dXvc++9J3L44a0BePzx03yOqmbqe7mdnw/Lve+AmZmuxrm4GPbtcwl0UZH72Kq65DgtDZo1c0l0kyaQmOj2B/PRTkhwz7lqlTtXcbF7rrS00L5GY0z9UC86ORrTIGkJbPwCdi4u2/ZpCHObi7+G+OSqj2ncFhJSQhdDPaKqTJr0A9df/yH79uVTUFDMJ59c6ndYDVZysqu1XrXKDQVYWFhW49ykCXTrBvEBLXUO9fthcrJLsFevducsbUry61+X1YAbYxouS7CNCaX8fWXNKvbbvgc+ux6i4ip/bM42yK6keatEl2UI0QnQ+XToePLBj8hRqmV/aD2g+uMMAFu3ZnHlle/y7rvubzt6dC+eeupMn6Nq2FJSoEcPV5PcpYurjQ71+Q4/3LXZjo6GjRthzx6X5O/ZYx0gjWnoLME2JlS0BF4cBHtW1vw5mnb0EufYsm3tj4ee5x96fKZGXn/9J66++n127colOTmef/3rNH7968MRazLju6Skuj1fVJRrKlK6nJ8P333nxvlu3br6BDuwnbgl5MZEFkuwjakpVdi+ALZ/D1QwPHv2FpdcxzaG5r0O3F9cAD0vgM5nVPz8MfHucWK/N4eL7duzGT9+CtnZhZxySleeffYs0tOtl5txTUZ27oS1a8vaZJeqrMNlZqZrWpKdDQsXwhlnlCXsxpj6zRJsY4KVuclNlZ2zDVa9Cyvfgcz11T+u73g44fHQx2dCrlWrxkyYcDp5eUX89reDrNba/CwlBQYOdMtLlrgOlj/95Nazsly7cBGXaAfOZNmoEWzf7tqML13qmrZYbbYx9Z8l2MYEKsyFeQ9Cbrkh6zI3wsq3Dzy+cRvXZKOy2f1iEuHIW2s/TlMn9u3L5/e//5BBg9ryu98NAeCyy47wOSoT7pKTYcsWmDXLtc8GiImBNm2gQ4cDa6lbtIA1a1zyvW2b6zxpCbYx9Zsl2KbhKciENR9Ccf7+24sL4OPLK39cdBw06+E6FXY8CbqNhrQh1oQjQs2YsYZx46awfv1e3nlnGWPH9qdx4yo6pRrjadvWtcEuHWO7Oikp0LWr6zC5bp1Lzps1s5psY+ozS7BNw7JlDrx/EexdU/VxLfrA4VeWrUfFuJE6kjuFNDzjv5ycQm69dTr/+tccAAYPbsvkyaMtuTYHpbTmOlilE9aUlLgRST77zNVsn3de7cdmjAk9S7BNZNu7FlZNhX3rYdtcN640uAS6VQU/9af2c4l1QrM6DdOEh2++2cDYse+wYsUuYmKi+Otfj+XWW39BbOxBZkvG1FCrVi7ZXuwNf79jB7Rs6W9MxpiDZwm2iQxb5wXUSqubnGXlO7Djh/2Pi0mA/v8Hv7jHjdJhjEdVueGGj1ixYhd9+7Zi8uTRDBjQxu+wTAOUkOAS7bVr4bXX4MgjXTMSazJiTP1hCbap//auhZeGVLwvtolr2tF6EDRpC13PgngbVs2UUVVEBBHhuefO4sUXf+SOO0YQH2/Fo/FPerob6m/zZtcuOyPDzRR5zDGHPgulMSb07Api6r8Nn7n7+BTX+RCgUUvoOgran2A11aZChYXF3HffVyxenMHLL5+DiNCnTyvuvfckv0MzBnCjkeTkQFERbNrktg0f7kYkMcaEN/s3NfVXSTHMuBZ+eNKttx4Io97wNyZTLyxevIOxY99h3rzNANx44zAGD27rc1TG7C8lxd3A1WJXNFmNMSY8WYJt6idVmH4VLHzGrbc/Hk74t78xmbBXXFzCo4/O5s9/nkF+fjEdOiTz/PNnW3Jt6oX8fPjxR1eD3aoVpKW57TblujHhxxJsU//sWAhf3gJrPnCdFs96GzqP9DsqE+ZWrdrFuHFT+PJLN/vm5ZcP4OGHT6VpU2tCZMJf6VTs8+e7ZiOtW8OAAW5fZiasXu0S7cREGDas7HGWcBvjD0uwTXhThYyFsPkb2PGjGxVkyzegJRCXBGe8asm1CcpTT33Hl1+uJy2tCf/97yjOPLOH3yEZE7TAqdi3boUNG+DLL8v2JyRA+/auU+Tcua4jZH4+9O5tCbYxfrAE24SXkmLYPAsWPQcFWbBrKez8af9jomKg/+9g2F8gsZU/cZp6oaREiYpyQy7ceecISkqU2277BS1aJPocmTE1l5ZW1jykvKQkdx8XB4sWuSTbGFP3LME24WPvGvjfKbBn5f7bE1q4WuqWR0DL/tBqACRalYypnKrywgs/8tBD3/DFF5eRnJxAo0axPPjgKX6HZkxINWlSthwVtf++0rbaqm6WSKvZNiZ0LME24SF72/7J9ZG3urGr41Mg/ViItmmqTXC2bcviqqve5513lgIwadIPXHfdUJ+jMsYfubnw009QUABZWbBqFWRnu06S/fq55aQk18Z7715rs21MbbEE2/hHFfaudtOXf/eIS65bDYQLPrPJYEyNvPnmYq666n0yMnJo2jSexx8fyZgx/f0OyxhfNG3qOkZu3eoS7ZgYaNTITWKTkQGzZsGePa4o7tcPCgshLw/OO8/vyI2p/yzBNnVjzypY+zGUFEBxIWz7DjZ9AVmby45J6QbnfmDJtTlou3blcu21H/DyywsBOOmkLjz33Fm0b5/sc2TG+KdTJ1dzDRAfv/8MkKmpLuHOyoL1612nyb17y8bdNsYcGkuwTWgtfxNm/QV2Lal4f0IL1wQk/Vjo/WtrW21qZPbsjbz88kISE2P55z9P5qqrBv/cudGYhioqyo0uUpHS7eUns9m9u+yYFSvcfffuoYvRmEhlCbYJjYJMmPVXmP9o2baeF7lRP0SgeS+XVDfvvX+1ijFBKioqISbG9eI6/fTu/POfJzN6dC+6dWvuc2TG1F/5+W70kdxc2LEDtmyBfftg0CC/IzOmfrEE29S+Ld/Cy0eVrTdpC7/6FpLS/YvJRJTPP1/Lb34zlZdeOoehQ93n6qabhvsclTH1W3KyS6i/+QaKitxt7143eU16upvcxhgTHEuwTe3K3QWvjyhb73o2nP0WSFSlDzEmWDk5hfzpT5/y2GPfAvDoo9/yyiv2xc2Y2pCSAkOGuB8VS4f4W70a1q51t+hoG2HEmGBZgm0O3eppsPpdt5y5AYry3PKpz0Hfcf7FZSLK7NkbGTv2HZYv30lMTBS3334Mf/rTMX6HZUxEiY7ef715c1eLvXWr6zD5i19Yqz5jgmEJtqmZ/L3wynDYtQy0+MD9Jz5hybWpFfn5Rfz97zO5775ZlJQohx3WksmTRzNoUFu/QzMm4qWkQM+esHw5bN8OQ4e6WSKhbOKaith42qahswTbHJziQtizAuY+CDsX77/vhH+BRLth9npc4E98JuLs3JnLk0/OQ1W5+ebh3Hnn8SQkWNFlTF1JTIRmzVwt9vLlLsEuKoI1a9y42Y0b7398drabUbJ5c2jZ0g0XaExDY1cpE5xFz8O2+bD8dcjZXrZ95ETofYlLrO13Q1NLiopKEIHo6Cjatk1i4sTRtGjRiKOP7uB3aMY0SMnJrrZ66VJX1OfnuxrsVq0gNnb/YwsLXfK9ZInrHGkJtmmILME2B9rxo7stfdnNsliYvf9+iYZmPaBJG+hyJkTZx8jUnqVLMxg79h3OPbc3N998NABnndXT56iMadhSUqBrV9cOOzHRbeva1U1WU9GxqrBwoTu2sPDAJLwyNva2iRSWGZn9LXkFpl0CaMX7T34ael5osy2aWldSojz++Lfcdtun5OUVsWtXLjfccBRxcdHVP9gYE3JND6LYF3GT2Sxb5mq/U1OhpARatHC13qVK23GruuR9927YtMk1SbE23KY+swTblMnaAh+OBRQ6nwaN20L/q9ykMABxTXwNz0SuNWt2M27cFGbOXAfAuHFH8Mgjp1pybUw91qqVa4+9YYOrmS4dU/vYY93+zEyXUO/b56Zsz8uDnBzXxnvhQjj++OrPUZqgFxa6JL1PH2utaMJDSBNsERkJPAZEA8+o6n3l9ncAJgEp3jG3quq0UMZkKpG7C+Y9BCWF0PII+OX7VkqZkFNV/vvf+dx448dkZRXQunVjnn56lDUJ8YmV2aY2paS4hDcnx9Vm5+XBqlWwYIHrJLlvnzuuWTPXTruJV4ezaJGbtn3x4rLa7vKjlezb59qBZ2fDzp1uhJPoaPccTZq4420kE+OnkCXYIhIN/Bs4GdgIzBWRqaoaOPTE7cDrqvqkiBwGTAM6hSomU4FZf3EzL26aBUU5blvbYZZcmzpRUqJMnvwDWVkFnH/+YTzxxBmkpib6HVaDZGW2CYW4uLJh/RISXNvqkhK3HBNTcRvutDRX4/3NN2UjkWRnu9rvjRtdU5Ic73IVGwtJSdCjhxvhZOZMl6zv2eOexxJs45dQ1mAfCaxU1dUAIvIqcDYQWFgrUNqqKxnYHMJ4THk522H23WXrnU6Fgde7e2NCRFXJzS0iMTGW6OgoJk4czbx5m7noor5+h9bQWZltQi6Ydtypqa72e9EiVxO9bRs0auS2N2/u2nRHRR04KY6Im3ly+XLYscMl5c2ale23Gm1Tl0KZYLcDNgSsbwSGljvmDuBjEbkWaAycVNETiciVwJUAHTrYMF2HLG83bP4aFj5btm38cmhm3bZNaG3fns3VV79PdnYBH3xwCSJCt27N6datud+hGSuzTRiJiYEjjnBNSaKDHAW2ZUt3AzdEYG4uzJnj2mbn5bkk/YQToK3NUWXqQJTP578YmKiq6cDpwAsickBMqvrhCepFAAAgAElEQVS0qg5W1cEtS/97TM3sWQXP94a3z4SVb7ttXUZZcm1C7u23l9C37xO89dYSvv56A8uX7/Q7JHPwrMw2dSompmYtFks/dnFx0LmzG71k0yb4/HPXRMWYUAtlDfYmoH3Aerq3LdDlwEgAVf1GRBKAVGA7pvZlb4P/nQI526B5b2gz1I0SkjbE78hMBNu9O5frrvuQF1/8EYDjj+/E88+fTceOKf4GZsqzMttEjNTU/ZuDdOzoarJ373b3xoRaKBPsuUB3EemMK6QvAn5V7pj1wInARBHpDSQAO0IYU8P27T2wdzW0HgwXzIC4JL8jMhHuo49WcvnlU9m0KZNGjWK4//6T+L//O5KoKOtEG4aszDYRTcQ1G1m3Drp08TsaE+lClmCrapGIXAN8hBvO6TlV/UlE/g7MU9WpwI3Af0Xk97jOM5ep2nfLQ1JcAEV5UJQLPz4NGYugpMDNzLh3tTum10WWXJs6MWvWBjZtyuSoo9KZNGk0PXq08DskUwkrs02kS052o5B89BGceKJ1ejShFdJxsL3xUaeV2/bXgOXFwNGhjKFBWfsJvH+h68RYkZhEaDscup9Tt3GZBmXfvnyaNo0H4Pbbj6V9+6aMHz+A6Gi/u3yY6liZbSJZSorr4LhtG3z/vRv2b8gQN5yfMbXNZnKMFKvfdx0XS8UluaYgfcdBdDw06wmpfSDK/uQmNHJzC7n99hm88soifvzxalJTE4mLi+aKKwb5HZoxxgBu2L59+9wMkHl5bjxuS7BNKFi2Vd+pwpe3wdz73XpcEly5EeKDGGzUmFoyd+4mxox5h6VLM4iOFj77bA3nn9/H77CMMWY/KSnuBmXjbP/4IxQXuxrtHj1snjVTOyzBru82zXLJdVQMHPVXGHidJdemzhQUFHPXXTO5996vKC5WevVKZfLk0QwZ0s7v0IwxpkqtWsHKla5GOzvb1Vfl57tmJNY22xwqS7Dru7kPuPsjb4Vhf/E3FtOgLFq0nUsvfZsFC7YiAjfeOIy77jqeRo1i/Q7NGGOq1aqVu4GbWn3FCtc2e8UK6NfPbbeOkKamLMGuL0qK4LtHYfeysm1Zm2DNBxAdBwOu9S820yBt357NggVb6dKlGRMnns0xx3T0OyRjjKmRlBQ4/HA3xfqmTa5GOyvLJeBHHw1t2lT+2IwM19QEICnJHVu6zRL0hssS7Prim7tg9t8r3tdmGCS2qtt4TIO0c2cOLVokAnDCCZ157bXzOP307jRpEudzZMYYc2ji4yE93SXIqrB1q2tCEhPjxs2Oj3ftswsK3AyRqpCZ6Wq/9+51SbUIDBvmkvNt26y5SUNmCXZ9UJAJq6a65W6/hM6nBewU6HSqL2GZhqOkRJkwYQ633fYp7713Mccf3xmACy6wjozGmMgSHe3u09MhKgrWrnUJdGmCXVzsthcVufbbIq4GvHVrV/s9e7Z7fOlIJeVHik9Nha5d6/QlGR8ElWCLSBzQQVVXhjgeU96mr2Har2DfOkBg+J3Qsp/fUZkGZO3aPYwbN4XPP18LwEcfrfo5wTbGmEjWtq275eSUDeuXkwOxsS7hjo0tS8gB2rcvW96yBTZvhvnzXY03uMdGRcHJJ1vzkUhXbYItImcADwNxQGcROQL4m6r+MtTBNXg/TYKPLgcthlYD4YR/WXJt6oyq8txz3/P7339EZmYBLVsm8tRTZ/LLX/b2OzRjjKlTiYnuVrocjDZtDmy7XdqZcsYMNyZ3nz6uhjuwljs11SX1pn4Lpgb778BQ4DMAVV0gIt1CGlVDpwoLn4HpV4GWwKAb4Zh/uM6MxtSBbduyGD9+KtOmrQDgnHN685//nEHLlo19jswYY+qvlBQ3Qsn27a6Ge/bssuQ6KsoNE5iUBJde6m+c5tAFk2AXquoe2X/kda3sYFMLZt4E3z3slo+6HY6+y994TIMTHR3FvHmbSUlJ4N//Pp2LL+6L2OwLxhhzyBISoEMHdytv3bqyEUlM/RZMgr1ERC4AokSkM3AdMDu0YTVwW7519+nHwfBKRg4xppZlZOSQnBxPbGw0qamJvPXWBXTu3Iy2bZP8Ds0YYxqM/Hw3y2RUlGvfXb5uw9pu1w9RQRxzDTAIKAHeAvKB60MZVINWXADbv3PLo/5nc7aaOjF16jL69HmCu+/+4udtRx/dwZJrY4ypQ8nJbljAb76BWbPgiy/c/aJFbsjAhQvdZDgm/AVTg32qqt4C3FK6QUTOwSXbpjaVFMO3/4CiPGjWExLtK6oJrT178rjhhg+ZNOkHAL7+eiPFxSVERwfz3dsYY0xtSkmBwYP335aX58beBjf039atrnlJixZWkx3Ogkmwb+fAZPrPFWwzhyJrM3xwKayf4dYH3eBvPCbiTZ++mnHjprBx4z4SEmK4774TufbaoURF2a8mxhgTLhIS3A2gXTs3CsmcOW4Ukh493PbiYjdCSbNmrmmJNSPxX6UJtoicCowE2onIwwG7muKai5jasuo9+PAyyNsJjVrCaZPKTSZjTO0pKCjm97//kCeemAfA0KHtmDRpND17WmlsjDHhrHQUkm3b3CgkmZlue36+u8XFuRrvRo2gf39IS6t6mncTOlXVYG8HFgF5wE8B2zOBW0MZVINRUgQz/wjzH3XrHU+G0yZD4zR/4zIRLTY2ihUrdhEbG8Udd4zg5puPJibGmoQYY0x9kJAAHTu6sbIDx8+OiYGSEti9G9asce23W7WCvn3dhDhNm9r42nWp0gRbVb8HvheRl1Q1rw5jajhm3gTzH4OoGDj6HhhyE4glOqb25eUVsWdPHmlpTRARnnvubHbuzKF/f/syZ4wx9VFs7IHboqKgZUt327EDli93nSLz892+iy6CxjadQZ0Ipg12OxG5BzgMSCjdqKo9QhZVQ7BvAyz4t0uoz5sO7Y/zOyIToebN28yYMW+TmprIZ5+NJTo6ivT0pqSnN/U7NGOMMSHSsqXrCJmfD+vXQ3a2q+E2dSOYBHsicDfwIHAaMA6baKZmVGH1e5DxE2z60jUR6XmRJdcmJAoLi7n77i+4554vKS5WevRowdatWbRrZ4m1McY0BFFRrj12QkLZNO1Nmux/jHWIDI1gEuxEVf1IRB5U1VXA7SIyD/hLiGOLPHPuh69u23/bkD/6E4uJaIsWbWfMmLf5/vutANxww1DuuedEEhMr+E3RGGNMREtOhp07YfVq17SkpMRNYpOd7ZLw7t3d0H+WaNeeYBLsfBGJAlaJyFXAJsBmnzhYPz7jJdcC/a+GuCRo2R9aD/Q7MhNhHn10NrfcMp2CgmI6dUrh+efPZsSITn6HZYwxxicpKW5Iv9xcV5td2jmypMSNRrJwoUu+R42C+Hh/Y40UwSTYvwca46ZIvwdIBsaHMqiIs3UuTP+tWz5xAhzxO3/jMREtP7+IgoJirrxyIA8+eApJSVZaGmNMQ9ekyYHNQ5o2hdatXWfI1ath1So47DB/4os01SbYqvqtt5gJXAogIu1CGVTE+ep20BLof5Ul16bWlZQoq1btonv3FgDcdNNwhg9vzzHHdPQ5MmOMMeEuLg7S0yErC2bMcM1J2lmWd8iqHBNORIaIyGgRSfXW+4jIZODbqh5nAnx+E6z72C33uczXUEzkWb9+L6ec8gJDhz7Dli1uxoHo6ChLro0xxgQtJQV69YLCQpg/v2yIv+XL3fTs5uBVNZPjvcC5wA+4jo3vAb8D7geuqpvw6rmsLfDdQ265328gbYi/8ZiIoapMnLiAG274iH378klNTWTlyl20aWPdI4wxxhy8lBQ38+O6dZCUBHv3lu074gg3uY0JXlVNRM4G+qtqrog0BzYA/VR1dd2EVs/l74O3z3DL6cfBKf/1Nx4TMbZuzeLKK9/l3XeXAzB6dC+eeupMWrWy2QOMMcbUXKtWrqnIihWuM2SjRrBypZt+PTPTNSfJznaT1djwflWrKsHOU9VcAFXdJSLLLbkOUlEeTBkN27+HlG5w5mt+R2QixLRpK7j00rfZtSuX5OR4Jkw4nUsu6YeI+B2aMcaYei4lBQaWG9ysUSPXVKSwEIqL3fB+jRq5DpOdO7tjLNk+UFUJdhcRectbFqBzwDqqek5II6uvVGHaJbDhM2icBud9DI1b+x2ViRDNmzdiz548TjmlK88+e5bNxmiMMSak0tJc8pyX54bwy811I45kZMDu3a5GOyEBjjnGjaVtnKoS7HPLrU8IZSARYddyePc8yFjo1k/6DyR39jcmU+/98MNW+vdPA+Coo9L55pvLGTKkrdVaG2OMqRMxMWVD/MXGukR6/Xq33qyZG95vzhyXjMfF+RdnOKk0wVbVT+sykIiw+l2XXCc0g35XQJcz/Y7I1GN79+bx+99/xPPPL+C99y7mjDN6AHDkkTZ+kjHGGP80b+5upQoLYe1aWLTowCYmDVUwE82YYKx+H2be5Jb7jIdj7/c3HlOvffrpasaPn8r69XuJj49my5Ysv0My9Yw3X0FHAsp5Vf3Cv4iMMZEqNdUN7Td7tlu3JDvECbaIjAQeA6KBZ1T1vgqOuQC4A1DgB1X9VShjCpl1n5QttzvavzhMvZadXcCtt05nwoS5AAwe3JbJk0fTu3dLnyMz9YmI3A9cCCwGir3NClSZYDeoMtsYU2tSUqB3b9cZ8quvICfHbc/Kgq5doXt3f+PzQ9AJtojEq2r+QRwfDfwbOBnYCMwVkamqujjgmO7AbcDRqrpbRFoFH3qYOuZ+6P5Lv6Mw9dCSJTs466xXWblyFzExUfztb8dx662/ICamyvmgjKnIaKCnldnGmLqSkgI9erj22D/+6JLrFm6CYUuwKyIiRwLPAslABxHpD/xGVa+t5qFHAitLh/YTkVdxY2svDjjmCuDfqrobQFW3H/xLCDPR1rrf1Ezbtknk5xfRr18rJk0azYABbfwOydRfq4FYIOgEm4ZaZhtjak1KCgwa5JYzMmDePNfpcflyyM93yyINY1i/YGqwHwfOBN4BUNUfROT4IB7XDjc5TamNwNByx/QAEJFZuJ8k71DVD4N47vBTlOt3BKYe+uGHrfTo0YJGjWJJTk7gk08upVOnFOLjrXuEOSQ5wAIR+ZSAJFtVr6viMQ2rzDbGhFRqKvTp49pmf/UVREW5Yf4KClzNdnIy9OzpJreJRMFcxaNUdV25IcGKKzu4BufvDowA0oEvRKSfqu4JPEhErgSuBOgQjoMsFuXBj0/7HYWpRwoLi7n33q+4664vuP76oTz44CkA9OwZ4V/pTV2Z6t1qW2SU2caYOtG+PaSnu+X8fDcb5LZtbnbIkhLYvBkGDIjMGu1gEuwNXjMR9droXQssD+Jxm4D2Aevp3rZAG4FvVbUQWCMiy3GF99zAg1T1aeBpgMGDB2sQ565b+9aVLXc61b84TL2wePEOxo59h3nzNgNQXFyCqtq41qbWqOokEYnDq3EGlnnlbFUaTpltjKkzpZe2hAR3a+n12d+0Cfbtc8l227aRl2AH03vqauAPQAdgG3CUt606c4HuItLZK+gv4sAalXdwNSGISCruYlD/pmP/4hZ3n9gKWvT2NxYTtoqLS3jooa8ZOPAp5s3bTMeOycyYMYZHHhlpybWpVSIyAliB67T4BLBcRI6t5mENp8w2xvguLc01D1mzBrZvd+20ly93bbcjQTA12EWqetHBPrGqFonINcBHuLZ6z6nqTyLyd2Ceqk719p0iIqVDSf1RVXce7Ll8U1wIs/4Cq6a49R7n+xuPCVuZmfmcccbLfPmlm/rq8ssH8PDDp9K0abzPkZkI9RBwiqouAxCRHsArwKDKHtAgymxjTNiIjnZtsffscbelS92ENdHRbhzt+t66LJgEe66ILANeA95S1cxgn1xVpwHTym37a8Cy4mrH/xDsc4aVb++Bud6EMoNuhOP+6W88Jmw1aRJHamoiaWlNeOaZUT/PymhMiMSWJtcAqrpcRGKre1DEl9nGmLDTsaMbN7uoyCXYK1fC1q0wfHj9bjpSbYKtql1FZDju58I7RWQB8Kqqvhry6MLZrmUwx5uD4ZwPoPNIf+MxYWfDhr3k5RXRvXsLRISnnx6FCLRokeh3aCbyzRORZ4AXvfVLgHk+xmOMMRWKiYGmTd1y8+ZudJHly+HLL6FzZzjyyPqZZAc1g4Wqfu0N7zQQ2Ae8FNKowlnWFpj/GLx1OhTnQ5/LLLk2+1FVJk1aQN++T3LxxW9SWOgG3UlNTbTk2tSVq3HjV1/n3RYTXN8ZY4zxVbNmMHSoa6O9ZQt8+62buGbZMnfLyHC3cG+vHcxEM01wkw1cBPQGpgDDQxxXeCrMhVeGlY0akjYETnjc35hMWNm2LYvf/vY9pkxxv863bZtEdnYhKSnRPkdmGhJvBseHvZsxxtQ7HTq4JiNr17oRR0TcGNopKa6me/NmV/s9bFh4DvMXTBvsRcC7wAOq+mWI4wlv2+aVJddH3w39r4a4JH9jMmHjf/9bzFVXvcfOnbk0bRrP44+PZMyY/jZCiKkzIvK6ql4gIguBA4bHU9XDfQjLGGMOWlSUm2I9cJr1rVtdYp2b69ptN20KP/wAXbrUzwS7i6qWhDyScFaYC98/Dt9PcOudT4ej/uxvTCasXHHFVJ555nsATjqpC889dxbt2yf7HJVpgK737s/0NQpjjAmBtDR3A1CF7GxYsgRiY6FxY5dkh0uiXWmCLSIPqeqNwJsiUlFNyDkhjSxc5GTAK0fBnlVuvUk6HPuAvzGZsHPEEWkkJsbyz3+ezFVXDSYqymqtTd1T1S3eYgaQq6ol3hB9vYAP/IvMGGNqlwg0aQJt2rgmJDt3uqYiZ57pEm6/VVWD/Zp3P6EuAgkrJcWw/A1Y+Q4s/x+oNzP8WW9Cl1EQHQZ/OeOrffvyWbBgK8ce2xGAq68ewqhRPenQwWqtTVj4AjhGRJoBH+MmkbkQN5qIMcZEjPR0V6u9apWbtGbzZjf0n98qTbBVdY632FtV90uyvckIPg1lYL7ZOg9ePx4Ks8q2JbaGw6+E7g2j0t5U7bPP1jBu3BR27sxl4cKr6dQphagoseTahBNR1RwRuRx4QlUf8IZYNcaYiBMTA61bw44dMHs2ZGZC374+xxTEMeM5sBb78gq2RYYVb5Ul18f+00193mkkRNkoEA1dTk4ht902nccfd989Bw1qQ0FBsc9RGVMhEZFhuBrry71tVogZYyJWSoobN3v5cjczZFqav+2xq2qDfSFuaL7OIvJWwK4kYE+oA/PN4snu/rxPoONJ/sZiwsbs2RsZO/Ydli/fSUxMFH/5y7HcdtsviI21nMWEpRuA24C3venOuwCf+RyTMcaEVLt2kJgIixbB119Dr17+DeFXVQ32HGAnkA78O2B7JvB9KIPyRdZmmDIasja59XbH+BuPCRtPPjmXa675gJISpU+flkyaNJpBg9r6HZYxlVLVmcDMgPXVuAlnjDEmojVr5mqyN250t5494cQT6z6OqtpgrwHWANPrLhwfzbkfts51y+nHQXScv/GYsDF8eHvi4qK57rojufPO40lICKZllTF1T0QeVdUbRORdKh4H+ywfwjLGmDqVnu5qs3/6yU1O44eqmojMVNXjRGQ3+xfUAqiqNg95dHWhuBC+ewQWPu3WT34K+l3hxn8xDVJRUQlTpizl3HMPA6B//zRWr76ONm1sUiET9l7w7h/0NQpjjPFZaRqXm+vaZScluXbZdZXeVVUVd7x3HyZDdofIxpnw5S1uOT4FOp9hyXUDtnRpBmPGvM3cuZt59dVzufBC1w3ZkmtTH6jqd97iPLxxsAFEJBqI9y0wY4zxQUoKZGTArFlubOzjjoP27evm3FGV7QiYvbE9EK2qxcAw4LdA4zqIrW4U5pQt/3oeJLXzLxbjm5IS5ZFHvmHAgKeYO3cz6elNadkycj7mpsH5FEgMWG9EQ2nuZ4wxnvR0OOww1/Fx+3ZYuxZK6mhu8koT7ADvACoiXYHnge7AyyGNyg9dRkFKV7+jMD5YvXo3xx8/iT/84WPy8ooYN+4IFi26mhNO6Ox3aMbUVIKq/jyYv7ecWMXxxhgTkeLi3CgiCQmwbBns21c35w2mt1aJqhaKyDnAv1T1cRGJvFFETIP01VfrGTnyRbKzC2ndujH//e8oRo3q6XdYxhyqbBEZqKrzAURkEJDrc0zGGOOLlBTX/nrjRleTnZIS+nMGk2AXicj5wKXAaG+bzRVuIsKAAWm0aZPEwIFteOKJ02nRwir5TES4AXhDRDbjOqan4aZKN8aYBik52U2jPn266/DYpk1ozxfsTI6/Ax5Q1dUi0hl4JbRhGRMaqsobbyzmtNO6kZQUT+PGccyefbkl1iaiqOpcEekFlP4cs0xVC/2MyRhj/JSSAr17w4IFMG0anH12aCegqbYNtqouwk1QMM8rsDeo6j2hC8mY0Ni+PZvzznuDCy/8H3/84yc/b7fk2kQaEUkEbgGu98rwTiJyps9hGWOMr1JSoG9fN5X63LmhPVe1NdgicgxubNVNeD81isilqjortKHVkRVv+h2BqQNvvbWEq656jx07ckhKimPoUBstxkS054HvcCM/gSu/3wDe8y0iY4wJA6mprj32jh2hPU8wTUQeAU5X1cUAItIbl3APDmVgdSJnOyye7Jajov2NxYTE7t25XHfdh7z44o8AnHBCZ5577iw6dqyDHg7G+Kerql4oIhcDqGqOiA3wb4wx4MbEDrVgEuy40uQaQFWXiEj9nkdcFaZdsn/t9ciJvoVjQiMjI4f+/f/D5s2ZNGoUwwMPnMzvfjeEqCjLM0zEKxCRRniz8HrDrOb7G5IxxoSPvDw3w2Pz5qFpix1Mgj1fRP4DvOitXwLU72H68nbBUq+fZufT4MQnID7Z35hMrUtNTeTEEzuzcuUuJk0aTffuLfwOyZi68jfgQ6C9iLwEHA1c5mtExhgTJpKTXTvsGTOgRQs4//zaP0cwCfZVuE6ON3vrXwL/qv1QfHLONL8jMLVo5sy1NG0az4ABbvydJ588g4SEGKKjg5lTyZj6z2sKshQ4BzgK13fmelXN8DUwY4wJE6mp0KQJLF0KUVGhqcmuMsEWkX5AV+BtVX2g9k4bJhKa+x2BqSW5uYX8+c8zePTR2fTu3ZLvvruShIQYGjeu362ZjDlYqqoiMk1V+wHv+x2PMcaEo4QEV3u9dy988w3k50PXrm5q9doYI7vSaj0R+RNumvRLgE9EZPyhn86Y2jdnziYGDHiKRx6ZTVSUcO65va2dtWno5ovIEL+DMMaYcJaeDp06uSYjxcWwcCF8/DEU1sKsAVXVYF8CHK6q2SLSEpgGPHfopzSmdhQUFPP3v8/kvvu+orhY6d07lUmTRjNkiA3BZxq8ocCvRWQtkI1rJqKqerivURljTBgRcU1FmjSB9u1h3TrYtAnmz3e12YfSZKSqBDtfVbMBVHWHiEROI9YSm9CsvlNVTjnlBWbOXIcI3HTTMO666wQSEoLpVmBMxDvV7wCMMaa+Ke38OH8+bN8Oo0bV/Lmqyka6iMhb3rIAXQPWUdVzan5aH6nCx1e45eQu/sZiakxEuOyyI9iwYR8TJ57NMcd09DskY3wnIgm4jundgIXAs6pa5G9UxhhTP6SkuNuCBbBr16E9V1UJ9rnl1icc2qnCxK5lsPo9iEuC017wOxpzEJYv38mCBVu54II+AIwd258LLuhDYmIdjBhvTP0wCSjEjfZ0GnAYcL2vERljTD3TtCns3g0ZhzD2UqUJtqp+WvOnDUOZG+Hj38Du5W69WQ9o0cvfmExQSkqUCRPmcOut01GFww9vTa9eqYiIJdfG7O8wb/QQRORZYI7P8RhjTL3TtCls3uw6PdZUw2mwuuYDWPuRW26SDkff5W88Jihr1+5h3LgpfP75WgDGjOlPWloTf4MyJnz93MFEVYtsdnRjjDl4qamuqcjOnTV/jpB2XBSRkSKyTERWisitVRx3roioiAwOWTC53rvUZhhcvsLN4GjClqryzDPz6dfvST7/fC2tWjXm7bcvZNKk0aSkJPgdnjHhqr+I7PNumcDhpcsisq+6B4dVmW2MMT6KPcQfyIOuwRaReFXNP4jjo4F/AycDG4G5IjJVVReXOy4J10bw22Cfu0Y2fObuB1wDMZaghbvbbvuU+++fBcC55/bmySfPoGXLxj5HZUx4U9Xomj427MpsY4ypx6qtwRaRI0VkIbDCW+8vIsFMlX4ksFJVV6tqAfAqcHYFx90F3A/kBR/2QSougE1fuuUOJ4TsNKb2jB8/gLZtk3jppXN4443zLbk2JvTCp8w2xpgwkJNT88cG00TkceBMYCeAqv4AHB/E49oBGwLWN3rbfiYiA4H2qlrldL4icqWIzBOReTt27Aji1OVsmQ1FuZDaFxqnHfzjTcjt2JHNAw/MQlUB6NGjBatXX8evftUPa0dqTJ0InzLbGGN8lpzsRhKB+LiaPD6YBDtKVdeV21Zck5MF8iaueRi4sbpjVfVpVR2sqoNbtmx58Cfb5Joa0D6Y7wWmrk2ZspS+fZ/kllumM2nSDz9vj49vOH1wjQl3dVpmG2OMz9LSXEdHiK5Rf8VgMpgNInIkoF4bvWuB5UE8bhPQPmA93dtWKgnoC3zu1VCmAVNF5CxVnRdM8EEr8ur4G1lBH0727Mnj+us/ZPJkl1SPGNGJESM6+RuUMQ1X+JTZxhgTBg7lB/RgEuyrcc1EOgDbgOneturMBbqLSGdcIX0R8KvSnaq6F/h5lncR+Ry4qdYL6sIcWF3lr5nGBx9/vIrx46ewaVMmCQkx3HffiVx77VCioqw5iDE+CY8y2xhjIkC1CbaqbscVtAfFG4P1GuAjIBp4TlV/EpG/A/NUdepBR3uwCrPhvYth+/duPcomJQkH77yzlF/+8jUAhg5tx6RJo+nZM7WaRxljQiksymxjjIkQ1SbYIvJfQMtvV9Urq3usqk4DppXb9tdKjh1R3fMdlJVTYUpAB/guZ0LvX0QznHgAACAASURBVFV+vKkzp53WjYED23Deeb354x+PJiYmpMOxG2OC5GuZbYwxESSYJiLTA5YTgF+yf0/z8PTp/5UtH3W7zdzoo7y8Iu677yuuu24ozZs3Ij4+hm+//Y0l1sYYY4yJSME0EXktcF1EXgC+CllEtSUuyd2PngpdR/kbSwM2b95mxox5myVLMli1ajcvvPBLAEuujTHGGBOxajIOWmegdW0HUmvy98HiFyA3w62ndPU3ngaqoKCYe+75gnvu+ZLiYqVnzxZcc80Qv8MyxhhjjAm5YNpg76asDXYUsAu4NZRBHZKFz8DMgGFaY20GwLq2aNF2xox5m++/3wrADTcM5R//OJFGjayTqTHGGGMiX5UJtrjBTvtTNhZqiZZOtReucra7+7ZHw+FXQNOO/sbTwGzYsJfBg58mP7+YTp1SeP75s21sa2OMMcY0KFUm2KqqIjJNVfvWVUCHREtg2atuudfF0Gesv/E0QO3bJzN+/ACKi0t48MFTSEqK9zskY4wxxpg6FUwb7AUiMkBVvw95NIeqIAv2ebO69x3nbywNREmJ8sQTcxkwII2jj+4AwIQJp9uEMcYYY4xpsCpNsEUkRlWLgAHAXBFZBWQDgqvcHlhHMQYvY5G7j0+G2ER/Y2kA1q3bw/jxU5kxYw1duzbjp59+R3x8jCXXxhhjjGnQqqrBngMMBM6qo1gO3ZIX3X2nkf7GEeFUlYkTF3D99R+SmVlAy5aJPPDAycTH12RQGmOMMcaYyFJVRiQAqrqqjmI5dMUF7r7Dif7GEcG2bMnkyivf4733lgMwenQvnnrqTFq1stFajDHGGGOg6gS7pYj8obKdqvpwCOKpJdZEIRSKi0s4/vhJLFu2k+TkeCZMOJ1LLumHG2zGGGOMMcZA1Ql2NNCE+pKtfnsvLHrW7ygiWnR0FHfffQLPPDP//9u787iqqq6B478lDoCKc0pqao+G4Gw4kOY8kBVqzmUO2ZNpqWlW+qpvObxNPmWZlpk5ZUU5m0+Z85QjKRpiDhklqIlmOADKsN8/zvUKgop6L5cL6/v53A/3zGvfy94s9tnnHGbNCqFCBR9Xh6SUUkoplePcLME+aYyZkG2R3I3UFNjzwbVp34auiyWX+e67Q0RF/cOQIY0A6No1gC5d/LXXWimllFLqBm45BtstnN5rPWCmaEX4dxRIPldH5Pbi4hIZPvxH5swJJ3/+fLRpcz/+/mUANLlWSimllLqJmyXY7nOl4Int1s976mly7QDr1h2jf//lHD9+nkKFPHjrrdb4+ZV2dVhKKaWUUm7hhgm2Mebv7Azkrvz6lfWzei/XxuHmLl26wmuvrWX69N0ABAbey/z5new910oppZRS6tZyx42Lr1ywfpZ2jye651RDhvxgHxLy+uvNGTWqKfnz6xkBpZRSSqnbkTsSbOUQr7/enMOHz/LRR49Qr56vq8NRSimllHJLuaN7Mjne+qnjr2/Lnj0nGTRoJampBoBKlYqzdeszmlwrpZRSSt0F9+/B/uVziPsdCvpA8aqujsYtJCWl8OabW5g0aQvJyak0aFCeZ56p5+qwlFJKKaVyBfdOsONjYc1A6/39j4JHQdfG4wYiI2Pp02cpP/98EoBhwxrRs6eOXVdKKaWUchT3TrB//RpMCuT3hub/cXU0OVpKSipTpuxg7Nj1XL6cQqVKxZgzpyMtW1ZxdWhKKaWUUrmK+ybY8afhp3HW+4fGQ5F7XRtPDjdv3j5eeWUNAM8+W4/33muPj08hF0ellFJKKZX7uG+CvetduHLeeh/Q27WxuIE+feqwYsUhnnvuQTp0qObqcJRSSimlci33ve1GQqz184FuULica2PJgY4fj6Nnz0X89ddFAPLnz8eyZT01uVZKKaWUcjL37ME+fxwi51vvA192bSw5jDGG+fP3MXToKs6fv0yhQvmZN6+Tq8NSSimllMoz3DPB3mRLqiUflGvg2lhykL/+usjAgStZvvwQACEhfrz7bhsXR6WUUkoplbe4Z4KddMn62eA1fbiMzaJFkTz//ErOnk3Ax6cQU6cG06dPHUTE1aEppZRSSuUp7plgX1W+iasjyBEOHTpD9+4LMQbatLmf2bNDqFixmKvDUkoppZTKk9w7wVYA+PmVZty4ZpQtW4Tnnw8kXz7ttVZKKaWUchVNsN3Q+fOXefnlH+naNYD27a3Hw48f39LFUSmllFJKKdAE2+1s2PA7/fsv548/4tiwIYpDh17Ew0PHoSullFJK5RROzcxEJFhEDonIUREZlcnyESISKSL7RWSdiFRyZjzuLD4+iWHDfqBVq/n88UccDz7oy4oVvTS5Vko5jLbZSinlGE7LzkTEA5gOPAIEAL1EJOC61fYCgcaY2sAi4F1nxePOduyIpl69T5k6dRf58+dj/PgWbN8+gICAMq4OTSmVS2ibrZRSjuPM7s+GwFFjzDFjzBUgFOiYdgVjzAZjTLxtcgdQIUt7vnDckXHmaFeupNC9+0IOHz5LjRpl2LnzWf73f5tToICHq0NTSuUuzmuzlVIqj3HmGOzyQNpMOBpodJP1BwA/ZLZARJ4DngO4/z5fOHPSWlC6liPizNEKFvTg008fY+PGKCZMaEmhQjpsXinlFE5ps++77z5HxaeUUm4jR2RrItIbCASaZ7bcGDMTmAkQGFDRAFD+YfDJfQ13cnIq77yzlcTEZCZObAXAI49U45FHqrk4MqWUstxWmx0YaLIxNKWUyhGcmWDHABXTTFewzUtHRNoAY4DmxpjLt9xr4jnrZ5VgR8SYoxw8GEvfvsvYvfsEHh7CgAH1qVy5uKvDUkrlDc5ps5VSKg9y5hjs3UA1EakiIgWBnsCKtCuISD3gUyDEGHM6S3u9+pj0Kh0cGatLpaYa3n9/O/Xqfcru3SeoWNGHVat6a3KtlMpOzmmzlVIqD3JaD7YxJllEXgR+BDyA2caYAyIyAQgzxqwAJgNFgIUiAvCnMSYkSwco7OucwLPZsWPn6N9/OZs3/wFA//51mTKlPcWKebo4MqVUXuL0NlsppfIQp47BNsZ8D3x/3bz/TfO+zR3tuPzDULjs3QWXQ4wevY7Nm/+gbNnCfPbZ4zz+uJ+rQ1JK5VFOa7OVUiqPyREXOd62YlVcHcFdMcZg6/3hgw/aU7RoQd55pw2lSnm7ODKllFJKKXW39DGA2cgYwxdf7KNDh69ITk4FwNe3KLNmhWhyrZRSSimVS2iCnU1On75Ely7f0qfPMlatOsrixZGuDkkppZRSSjmBew4RcTNLlhxk4MCVnDkTj49PIT78MJju3Wu4OiyllFJKKeUEmmA70blzCQwduooFC/YD0KpVFebM6ch99xVzcWRKKaWUUspZNMF2otDQCBYs2I+XV34mT27LoEENyJdPXB2WUkoppZRyIk2wHSztHUIGDgzk8OGzDB7cgGrVSrk4MqWUUkoplR30IkcH2rQpivr1Z/Lnn3EA5MsnTJkSrMm1UkoppVQeogm2AyQkJDF8+CpatpxHePgpJk/+ydUhKaWUUkopF9EhIndp164Y+vRZyqFDZ/HwEMaMeZgxY5q5OiyllFJKKeUimmDfoStXUpgwYRNvvbWV1FSDv39p5s/vTGDgva4OTSmllFJKuZAOEblDBw/G8vbbWzHGMHJkEHv2DNTkWimllFJKaQ/27UhNNfbb7NWpU46pUx+hVq17ePjhSi6OTCmllFJK5RSaYGfRoUNn6Nt3Ga+91oTOnf0BGDy4gYujynuSkpKIjo4mMTHR1aEodUOenp5UqFCBAgUKuDqUHCqJfPmiEUlE9NEAygWMAWM8SU2tAGg9VY6nCfYtpKYaPvpoJ6NGrSMxMZlJk7bQqVN1+72uVfaKjo6maNGiVK5cWb8DlSMZYzh79izR0dFUqVLF1eHkSPnyRXPPPUUpVkzrsXINYwxxcWc5fTqa1FStp8rxdAz2TURF/UPr1vN56aUfSUxMpm/fOqxb10f/ILhQYmIipUqV0u9A5VgiQqlSpfQsy02IJFKsmNZj5ToiYvsd1HqqnEN7sDNhjOHzz/cyfPiPXLx4hXvuKczMmY/RsWN1V4emQP8oqxxPf0dvTkQ/I+V6IqJDlJTTuGeC7VHQqbuPj0/izTe3cPHiFbp2DeCTTx6ldGlvpx5TKaWUUkrlDu45RKR6L4fv0hhDcnIqAIULF2TevE589dUTfPttV02uVToeHh7UrVuXmjVr8vjjj/PPP//Ylx04cIBWrVrh5+dHtWrVmDhxIsYY+/IffviBwMBAAgICqFevHi+//LIrinBTe/fuZcCAAa4O46beeustqlatip+fHz/++GOm66xbt4769etTt25dmjZtytGjR9MtX7x4MSJCWFiYfd7+/fsJCgqiRo0a1KpVK8Mwj5CQEGrWrGmfHjlyJOvXr3dgyVR28fHxICioLg0a1KR3727Ex8dnmN+tW/r6nVMYY+jQoRXnz593dSg3tHfvzzRsWIvatasycuTQdO3gVR98MJmgoLr2z9vHx4O///4bgGnTphAYWIMGDWrSr18ve11s2/Zh+zZVq95Lz56dAFi5cjmNGtUmKKguDz8cyLZtWwGIjY2lU6fgbCq1Ute4Z4Jdpo5Ddxcbe4nu3Rfx0kur7PMefrgSvXrV0tOYKgMvLy/Cw8OJiIigZMmSTJ8+HYCEhARCQkIYNWoUhw4dYt++fWzbto2PP/4YgIiICF588UUWLFhAZGQkYWFhVK1a1aGxJScn3/U+3nzzTYYOHZqtx7wdkZGRhIaGcuDAAVatWsXgwYNJSUnJsN6gQYP48ssvCQ8P58knn2TSpEn2ZRcuXODDDz+kUaNG9nnJycn07t2bGTNmcODAATZu3JjuLiBLliyhSJEi6Y4xZMgQ3n77bSeUUjmbl5cX27eHs3t3BAULFuTzz2dkmF+iRElmzpzu0OM6or78+OP31KpVBx8fnyxvk1kdcaaXXhrEtGmfsW/fEX777Qhr1qzKZJ1X2L49nO3bwxk//i2aNm1OyZIlOXEihk8+mcqWLWHs3h1BSkoKixaFArBmzRb7Ng0bBhES8gQALVq0ZseOfWzfHs4nn8zmhReeBaBMmTKUK+fL9u0/ZV/hlcJdh4g40LJlvzJw4EpOn75E0aIFGTeuGWXLFrn1hsr13nPSPz8vZ+xpuZGgoCD2798PwFdffUWTJk1o164dAN7e3kybNo0WLVrwwgsv8O677zJmzBiqV7fG8nt4eDBo0KAM+7x48SJDhgwhLCwMEeH111+nS5cuFClShIsXLwKwaNEiVq5cydy5c+nXrx+enp7s3buXJk2asGTJEsLDwylevDgA1apVY+vWreTLl4/nn3+eP//8E4APPviAJk2apDv2hQsX2L9/P3XqWP/E7tq1i2HDhpGYmIiXlxdz5szBz8+PuXPnsmTJEi5evEhKSgqbNm1i8uTJfPvtt1y+fJnOnTszfvx4ADp16sTx48dJTExk2LBhPPfcc1n+fDOzfPlyevbsSaFChahSpQpVq1Zl165dBAUFpVtPROw9fHFxcdx777UHQY0bN47XXnuNyZMn2+etXr2a2rVr28teqlSpdN/J+++/z8yZM+nevbt9fqVKlTh79iynTp2iXLlyd1WuvCo6GhISHLtPLy+oUCHr6z/00MNEROzPML9hw6BM5wN89dV8PvzwP4gINWvWZtasLxg4sB/BwY/RuXNXAMqWLcJff11k8+aNTJw4jhIlSnD48K+EhDxB+fIVGTjwBQD+7//eoEiRIgwbNpIPPpjMkiVWPXr88c6MHTs+w7G/+eZL+ve/Vo969uxEdLRVxwYPHsYzzzxnP/4zzwxkw4a1vP/+dLy8vBg1agSXLl2kVKnSfPrpXMqV82XOnM+YM2cmV65c4f77qzJr1hd4e9/5mdtTp05y/vx5GjZsDECvXn347rtltGv3yA23Wbjwa7p1u3Z2Ojk5mYSEBAoUKEBCQjy+vukf5Hb+/Hk2b17PjBlzANL983vp0qV0nWOPPdaJb775kqCg9O2dUs6UZxPsf/5JZNiwVcyfvw+AFi0qM2dOR02uVZalpKSwbt06+3CKAwcO8OCDD6Zb51//+hcXL17k/PnzREREZGlIyMSJEylWrBi//PILAOfOnbvlNtHR0Wzbtg0PDw9SUlJYunQp/fv3Z+fOnVSqVImyZcvy5JNPMnz4cJo2bcqff/5J+/btOXjwYLr9hIWFpRsCUb16dbZs2UL+/PlZu3Yt//M//8PixYsB2LNnD/v376dkyZKsXr2aI0eOsGvXLowxhISEsHnzZpo1a8bs2bMpWbIkCQkJNGjQgC5duqRLXgGGDx/Ohg0bMpSrZ8+ejBo1Kt28mJgYGjdubJ+uUKECMTExGbadNWsWHTp0wMvLCx8fH3bs2GGP+/jx4zz66KPpEuzDhw8jIrRv357Y2Fh69uzJq6++ClgJ+csvv5xp0lG/fn1++uknunTpkvmXo3K05ORk1qz5gTZt0g8jSElJYePGdfTtm3G4VGTkAd59dxJr126jdOnS9mENN7Nv3x527YqgcuUq7Nu3l1dffcmeYC9Z8i3Ll//IunWrOXr0CJs2WfWoe/cQtm7dTNOmzdLta8eOn5g69VP79McfX6tjzZo1oGNHq45dunSJwMBGvPXWeyQlJREc3JzQ0OWUKVOGRYu+Yfz4MXzyyWxCQp6gf/9/AzB+/FjmzfucQYOGpDvmpk0bGDVqeIZyeXt7s27dtnTzTpyIoXz5a//hlC9fgZMnM9bRq+Lj41m7dhXvvTcNgHvvLc/QoSPx978PT08vWrduR+vW7dJts3LlMpo3b52uF3/FiqW8/vpozpw5zaJF/7XPr18/kAkTxt7w+Eo5Q55MsFev/o1nnllOTMwFPD3z8847bXjxxYb2pzQqN3EbPc2OlJCQQN26dYmJicHf35+2bds6dP9r164lNDTUPl2iRIlbbtOtWzc8PDwA6NGjBxMmTKB///6EhobSo0cP+34jIyPt25w/f56LFy+m6/k5efIkZcqUsU/HxcXRt29fjhw5goiQlJRkX9a2bVtKliwJWL2/q1evpl69eoDV43vkyBGaNWvG1KlTWbp0KQDHjx/nyJEjGRLsKVOmZO3DuQ1Tpkzh+++/p1GjRkyePJkRI0Ywc+ZMRowYwdy5czOsn5yczNatW9m9ezfe3t60bt2aBx98kFKlSvHbb78xZcoUoqKiMmx3zz33cOLECYfHn1fcTk+zIyUkJBAUVBewerCvJtJX5584EYOfnz+tWmWs35s2radTp26ULl0awF4PbubBBxtSubJ1v+U6deoRG3uakydPcOZMLCVKlKBChYp8/PGHrF+/mocesurRpUsX+e23IxkS7HPn/qZo0aL26U8+mcp331l1LCbmOL/9ZtUxDw8POnWy/vE7fPgQkZERhIRY5UlJSaFcOV8AIiMjmDBhLHFx/3Dp0kVat26fIf7mzVuyfXv4Lct5J77//jsaN25i/xzPnTvHf/+7nIiI3ylevDhPP92N0NAF9OzZ277NwoVf07fvs+n2ExLSmZCQzmzdupmJE8excuVaAMqUuYdTp7SOquyVJxPsOXPCiYm5QOPGFZg7tyN+fqVdHZJyI1fHYMfHx9O+fXumT5/O0KFDCQgIYPPmzenWPXbsGEWKFMHHx4caNWrw888/24cg3K60pzyvv/iucOHC9vdBQUEcPXqU2NhYli1bxtixVs9NamoqO3bswNPT86ZlS7vvcePG0bJlS5YuXUpUVBQtWrTI9JjGGEaPHs3AgQPT7W/jxo2sXbuW7du34+3tTYsWLTK9P/Tt9GCXL1+e48eP26ejo6MpX758unViY2PZt2+ffYx1jx49CA4O5sKFC0RERNjLcerUKUJCQlixYgUVKlSgWbNm9qSpQ4cO7NmzhyJFihAWFkblypVJTk7m9OnTtGjRgo0bNwLYh88o93J1rPWN5sfHx9OxY3s+/XQ6gwdn7ZqE/Pnzk5pqXSyfmprKlStX7MvS1heAzp27sXTpIk6fPsUTT1j/BBtjePnl0QwYkL4e3eg4+fLlY/PmjWzcuJb16606Fhx8rY55enra//E2xuDvX4P167dn2N/zz/cjNHQZtWrVYcGCuWzZsjHDOrfTg33vveWJiYm2T8fEROPrW/76Te0WLQpNNzxkw4a1VK5cxf7PfkjIE+zYsc2eYJ85c4aff97F118vzXR/TZs2IyrqGGfOnKF06dIkJibi6al1VGUv97zI8Q5cvnztwpJp0x7h/ffbsWVLf02u1R3z9vZm6tSpvPfeeyQnJ/PUU0+xdetW1q61ek0SEhIYOnSofZjBK6+8wptvvsnhw4cB6w/wjBkzMuy3bdu29gsn4doQkbJly3Lw4EFSU1PtPcKZERE6d+7MiBEj8Pf3t/cWt2vXjo8++si+Xnh4xuTC398/3d024uLi7MlrZr2+V7Vv357Zs2fbx4jHxMRw+vRp4uLiKFGiBN7e3vz666/2YRrXmzJlCuHh4Rle1yfXYN3JIzQ0lMuXL/P7779z5MgRGjZsmG6dEiVKEBcXZ/+s16xZg7+/P8WKFePMmTNERUURFRVF48aNWbFiBYGBgbRv355ffvmF+Ph4kpOT2bRpEwEBAQwaNIgTJ04QFRXF1q1beeCBB+zJNVhDS9IOq1G5g7e3N//5z1Q++ui9DBcmNm/eimXLFnL27FkA+xCR++6rTHj4zwD8978r0p3xuV6XLj1YvDiUZcsW8cQT3QBo06Y9X3xxrR6dOGHVo+tVq+bH778fA+D8+TiKF7fq2KFDv7J7d+Z17IEH/DhzJpadO60EOykpicjIA4B17UXZsr4kJSXxzTdfZrr91R7s61/XJ9cA5cr54uPjw65dOzDG8PXX83nssY6Z7jcuLo6fftrEo49eW16x4n3s2rWD+Ph4jDFs3LgOPz9/+/JlyxYRHPxYus6C3347ar9TSXj4Hi5fvmxv+44ePUxAgNZRlb1yfYKdmJjMyJGrCQr6nCtXrKuoS5XyZvjwIPLnz/XFV05Wr149ateuzddff42XlxfLly9n0qRJ+Pn5UatWLRo0aMCLL74IQO3atfnggw/o1asX/v7+1KxZk2PHjmXY59ixYzl37hw1a9akTp069p7dt99+m8cee4yHHnoIX1/fm8bVo0cPFixYYB8eAjB16lTCwsKoXbs2AQEBmSb31atXJy4ujgsXLgDw6quvMnr0aOrVq3fTux+0a9eOJ598kqCgIGrVqkXXrl25cOECwcHBJCcn4+/vz6hRo9KNnb5TNWrUoHv37gQEBBAcHMz06dPtvXQdOnTgxIkT5M+fn88++4wuXbpQp04dvvjii3TjrTNTokQJRowYQYMGDahbty7169fn0Ucfvek2SUlJHD16lMDAwLsul8p56tSpR82atVm48Ot08wMCavDKK2MIDm5O48Z1GD16BAD9+v2brVs30bhxHXbt2p6h1/r6fVy4cAFf3/L2oRqtW7ejW7cnadUqiIYNa9G7d1cuXryQYdv27R+19zK3bWvVsfr1/Xn99VE0aJB5HStYsCALFixi3LjXaNy4DkFBddm500qOx42bSMuWjWjTpgkPPOCYB6pNmfIxL7zwLLVrV6VKlX/ZL3CcNWsGs2Zda3u++24prVq1S/dZNWjQiE6dutKkSX0aNqxFamqq/cJNyNjjDbB8+WIaNKhJUFBdRox4gXnzvrGf9du8eQPt29+8LivlaJLZvSlzssCKYsIOnwGvUrdcNyzsBH36LOXgwTN4eAirVvWmTZv7syFK5SwHDx7E39//1iuqOzZlyhSKFi3Ks88+e+uV87ilS5eyZ88eJk6cmGFZZr+rIvKzMSZPZeOBgYHm6r3GL1+GDRvA1/cgVatqPb5Tp06d5N//7sN3361xdShuoV27ZnzzzfJMr2c5evQgycn6u6gyd+AA9O1b+IAxl277FEiu7MK9ciWF11/fQOPGszh48AzVq5dm27YBmlwrlQWDBg2iUKFCrg7DLSQnJ+fIhwWp3K1cOV/69ft3jn7QTE4RGxvLkCEjsnSxuFKOlOsucoyIOE2fPkvZu/cUIjBiRGMmTWqFl1eBW2+slMLT05Onn37a1WG4hW7durk6BJVHdenS/dYrKcqUKcPjj3dydRgqD8p1CfaOHdHs3XuKKlWKM2dOR5o3r+zqkJSDGWP0CZsqR3O3oXfZzRitx8r1jDFoVVXOkisS7EuXrlC4cEEABgyoR0JCEv361aVoUT3Nndt4enpy9uxZSpUqpX+cVY5kjOHs2bM3vR1iXmeMJ3FxZylWTOuxcg1jDHFxZzFG66lyDrdOsFNTDdOn72LChM1s2/YM1apZjfWQIY1cHZpykgoVKhAdHU1sbKyrQ1Hqhjw9PangqieouIHU1AqcPm3VY82vlStYZ1E8SU3Veqqcw20T7D/++If+/ZezYUMUAIsXH2TUqKauDUo5XYECBahSpYqrw1BK3ZUCpKZqPVZK5V5OvYuIiASLyCEROSoiGZ4YISKFROQb2/KdIlI5K/v9fG4ktWp9woYNUZQp483ixd01uVZKqbvkrDZbKaXyGqcl2CLiAUwHHgECgF4iEnDdagOAc8aYqsAU4J1b7ffoP+V5dvB6Lly4QufO1YmIGMwTT+g9LJVS6m44q81WSqm8yJlDRBoCR40xxwBEJBToCESmWacj8Ibt/SJgmoiIuckl+OcvGYoVK8S0aR146qlaeoGMUko5hlPa7OsZA4mJjglYKaWcKSnpzrd1ZoJdHjieZjoauP7qQ/s6xphkEYkDSgFn0q4kIs8BV5+TejkubnTE00+PJg/dqrc0130meYCWOW/Ia2X2c3UAN+G0NltEIq4tLexFrr+0MakEFDjn6iiyl5Y5b8iLZU6ofCdbucVFjsaYmcBMABEJy2uPGtYy5w1a5txPRMJcHUN20DZbwoxJ1DLnclrmvOFO221nXuQYA1RMM13BNi/TdUQkeYesrQAACYBJREFUP1AMOOvEmJRSSmVO22yllHIQZybYu4FqIlJFRAoCPYEV162zAuhre98VWH87Y/mUUko5jLbZSinlIE4bImIbn/ci8CPgAcw2xhwQkQlAmDFmBfA58IWIHAX+xmrQb2Wms2LOwbTMeYOWOffLseXVNtuhtMx5g5Y5b7ijMot2PiillFJKKeU4Tn3QjFJKKaWUUnmNJthKKaWUUko5UI5NsPPiI3uzUOYRIhIpIvtFZJ2IVHJFnI50qzKnWa+LiBgRcevbA2WlvCLS3fY9HxCRr7I7RkfLwu/1fSKyQUT22n63O7giTkcSkdkicjr9/Z/TLRcRmWr7TPaLSP3sjtHRtM3WNvu69XJFmw3abueFdtspbbYxJse9sC6w+Q24HygI7AMCrltnMDDD9r4n8I2r486GMrcEvG3vB+WFMtvWKwpsBnYAga6O28nfcTVgL1DCNn2Pq+POhjLPBAbZ3gcAUa6O2wHlbgbUByJusLwD8AMgQGNgp6tjzobvWdvsPFBm23q5os2+je9Z2203b7ed0Wbn1B5s+yN7jTFXgKuP7E2rIzDP9n4R0FrErZ8OdssyG2M2GGPibZM7sO5T686y8j0DTATeAdz9ActZKe+/genGmHMAxpjT2Ryjo2WlzAbwsb0vBpzIxvicwhizGesuGzfSEZhvLDuA4iLimz3ROYW22dpmp5Vb2mzQdjtPtNvOaLNzaoKd2SN7y99oHWNMMnD1kb3uKitlTmsA1n9T7uyWZbadhqlojPlvdgbmJFn5jh8AHhCRn0Rkh4gEZ1t0zpGVMr8B9BaRaOB7YEj2hOZSt1vfczpts7XNBnJdmw3aboO223AHbbZbPCpdpScivYFAoLmrY3EmEckHvA/0c3Eo2Sk/1unGFli9XZtFpJYx5h+XRuVcvYC5xpj3RCQI6z7LNY0xqa4OTClH0DY719N2W9vtDHJqD3ZefGRvVsqMiLQBxgAhxpjL2RSbs9yqzEWBmsBGEYnCGve0wo0vmsnKdxwNrDDGJBljfgcOYzXc7iorZR4AfAtgjNkOeAKlsyU618lSfXcj2mZrmw25r80GbbdB2224gzY7pybYefGRvbcss4jUAz7FaqjdfYwX3KLMxpg4Y0xpY0xlY0xlrDGMIcaYMNeEe9ey8nu9DKsXBBEpjXXq8Vh2BulgWSnzn0BrABHxx2qoY7M1yuy3AuhjuzK9MRBnjDnp6qDugrbZ2mbnxjYbtN3Wdtty+222q6/cvNEL64rNw1hXso6xzZuAVVnB+jIXAkeBXcD9ro45G8q8FvgLCLe9Vrg6ZmeX+bp1N+L+V6Tf6jsWrFOskcAvQE9Xx5wNZQ4AfsK6Uj0caOfqmB1Q5q+Bk0ASVu/WAOB54Pk03/N022fyi7v/Xmfxe9Y2W9tst3xpu537221ntNn6qHSllFJKKaUcKKcOEVFKKaWUUsotaYKtlFJKKaWUA2mCrZRSSimllANpgq2UUkoppZQDaYKtlFJKKaWUA2mCrZxCRFJEJDzNq/JN1q0sIhEOOOZGETkkIvtsj6z1u4N9PC8ifWzv+4nIvWmWzRKRAAfHuVtE6mZhm5dExPtuj62UUjlRmr8ZESLynYgUd/D++4nINNv7N0RkpCP3r9T1NMFWzpJgjKmb5hWVTcd9yhhTB5gHTL7djY0xM4wx822T/YB70yx71hgT6ZAor8X5MVmL8yVAE2ylVG519W9GTeBv4AVXB6TU3dAEW2UbW0/1FhHZY3s9lMk6NURkl60nY7+IVLPN751m/qci4nGLw20Gqtq2bS0ie0XkFxGZLSKFbPPfFpFI23H+Y5v3hoiMFJGuQCDwpe2YXrae50BbL7c9Kb6uZ+R249wOlE+zr09EJExEDojIeNu8oViJ/gYR2WCb105Etts+x4UiUuQWx1FKKXdxfbv4iu1s3/6r7aJtfh/bvH0i8oVt3uMistPW5q8VkbIuiF8pTbCV03ilGR6y1DbvNNDWGFMf6AFMzWS754EPjTF1sRLcaNtjWHsATWzzU4CnbnH8x4FfRMQTmAv0MMbUAvIDg0SkFNAZqGGMqQ1MSruxMWYREIbV01zXGJOQZvFi27ZX9QBC7zDOYKzH7F41xhgTCNQGmotIbWPMVOAE0NIY09L2KN6xQBvbZxkGjLjFcZRSKsezdUq0xvZobhFpB1QDGgJ1gQdFpJmI1MBqB1vZzgYOs+1iK9DYGFMPCAVezeYiKAVYyYZSzpBgSzLTKgBMs405TgEeyGS77cAYEakALDHGHBGR1sCDwG4RAfDCStYz86WIJABRwBDAD/jdGHPYtnwe1qnHaUAi8LmIrARWZrVgxphYETkmIo2BI0B1rEfGvnCbcRYEimD90biqu4g8h1U3fbEeR7v/um0b2+b/ZDtOQazPTSml3JWXiIRj9VwfBNbY5rezvfbapotgJdx1gIXGmDMAxpi/bcsrAN+IiC9W2/h79oSvVHqaYKvsNBz4C6thzIeV4KZjjPlKRHYCjwLfi8hAQIB5xpjRWTjGU8aYsKsTIlIys5WMMcki0hCrp6Qr8CLQ6jbKEgp0B34FlhpjjFjZbpbjBH7GGn/9EfCEiFQBRgINjDHnRGQu4JnJtgKsMcb0uo14lVIqJ0swxtS1Xcz9I1aHxVSs9u4tY8ynaVcWkSE32M9HwPvGmBUi0gJ4w3khK3VjOkREZadiwEljTCrwNJBhfLKI3A8csw2LWI41VGId0FVE7rGtU1JEKmXxmIeAyiJS1Tb9NLDJNma5mDHme6zEv04m214Ait5gv0uBjkAvrGSb243TGGOAcUBjEakO+ACXgDjbuMFHbhDLDqDJ1TKJSGERyexsgFJKuRVjTDwwFHhZRPJjJdvPXL3ORETK29rY9UA323C/tJ0pxYAY2/u+2Rq8Umlogq2y08dAXxHZhzWs4lIm63QHImynCmsC82137hgLrBaR/VinDn2zckBjTCLQH1goIr8AqcAMrGR1pW1/W8l8DPNcYMbVixyv2+85rNOYlYwxu2zzbjtO29ju94BXjDH7sE6D/gp8hTXs5KqZwCoR2WCMicW6w8nXtuNsx/o8lVLK7Rlj9mINjetljFmN1R5ut7Xhi4CixpgDwP9hdZjsA963bf4GVnv/M3Am24NXykasTjSllFJKKaWUI2gPtlJKKaWUUg6kCbZSSimllFIOpAm2UkoppZRSDqQJtlJKKaWUUg6kCbZSSimllFIOpAm2UkoppZRSDqQJtlJKKaWUUg70/0Z9THA5nGgeAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axs = plt.subplots(1, 2, figsize=(12, 4))\n", "bcm.plot_roc_curve(ax=axs[0])\n", "bcm.plot_pr_curve(ax=axs[1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### You still have the values for the corresponding areas!" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Area under ROC Curve: 0.8464\n", "Area under PR Curve: 0.7873\n" ] } ], "source": [ "print(\"Area under ROC Curve: {:.4f}\".format(bcm.areaUnderROC))\n", "print(\"Area under PR Curve: {:.4f}\".format(bcm.areaUnderPR))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### \"I want to have a Recall of at least 50%... which threshold should I use then?\"\n", "\n", "### `getMetricsByThreshold` gives you all possible thresholds and corresponding values in a nice pandas dataframe!" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
thresholdfprrecallprecision
1000.5807680.0710380.4853800.809756
1010.5780740.0710380.4912280.811594
1020.5771530.0710380.4941520.812500
1030.5720060.0710380.5029240.815166
1040.5691740.0710380.5058480.816038
\n", "
" ], "text/plain": [ " threshold fpr recall precision\n", "100 0.580768 0.071038 0.485380 0.809756\n", "101 0.578074 0.071038 0.491228 0.811594\n", "102 0.577153 0.071038 0.494152 0.812500\n", "103 0.572006 0.071038 0.502924 0.815166\n", "104 0.569174 0.071038 0.505848 0.816038" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bcm.getMetricsByThreshold().toPandas()[100:105]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Once you choose your threshold, you can check its corresponding Confusion Matrix as well!" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DenseMatrix(2, 2, [510.0, 170.0, 39.0, 172.0], 0)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bcm.confusionMatrix(.572006)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predicted
01
Actual0510.039.0
1170.0172.0
\n", "
" ], "text/plain": [ " Predicted \n", " 0 1\n", "Actual 0 510.0 39.0\n", " 1 170.0 172.0" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bcm.print_confusion_matrix(.572006)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2.10 What else can one do using `HandySpark`?\n", "\n", "### Lots of things, these are just a few examples..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Correlations" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "strat_corr = hdf_fenced.stratify(['Sex']).cols[:].corr()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sex index \n", "female Age 0.132313\n", " Fare 0.218466\n", " Parch -0.223644\n", " PassengerId -0.008790\n", " Pclass -0.477114\n", " SibSp -0.263284\n", " Survived 1.000000\n", " logFare 0.260685\n", "male Age -0.091223\n", " Fare 0.171288\n", " Parch 0.096318\n", " PassengerId 0.040477\n", " Pclass -0.220618\n", " SibSp -0.020238\n", " Survived 1.000000\n", " logFare 0.225389\n", "Name: Survived, dtype: float64" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strat_corr.loc[pd.IndexSlice[:, 'Survived']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Entropy" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Embarked 1.095450\n", "Pclass 1.439321\n", "Sex 0.936205\n", "Survived 0.960708\n", "Name: entropy, dtype: float64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hdf_fenced.cols[['Survived', 'Pclass', 'Embarked', 'Sex']].entropy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stratified histograms" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsgAAAI4CAYAAAB3OR9vAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X+0ZHV95vv3MzQSRUcknLCI0Gk1hIkwptUzJMZoMJgI0euvm1FZib9ibLlL5yaTyTUoRk1GMxijRmNCbiukMUEERRKjRCUaxcwEtFtbaAQVmebapKUbiKLRhQKf+8fZLV/a032qT1Wdvav7/VqrVlftqtr1qYYHntq1a+9UFZIkSZIW/Lu+B5AkSZKGxIIsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiStAxJtiZ5Yt9zSJImz4IsST1JsiHJd5N8q7k8u++5JOlAt6rvASTpAPdHVfWqcVaQZFVV3TmpgSTpQOcWZEkaQ5JDkvxJkn/pLn+S5JDm/pcn2d7d9xtJKsmPj7DeVyW5Ick3k1yT5KnNfb+R5PIkb0tyG/CqZvl1Sf41yd8nOWYqb1qS9nMWZEkaz5nAzwBrgZ8CTuSewnoK8NvAE4EfB07ah/V+CXgs8EDg9cC7kxzZ3P+zwLXAHPCGJP8n8P8AT+uWXQm8e7lvSpIOZBZkSRrPrwJ/UFU7qmon8PvAc7v7ngX8ZVVdU1XfBl67yPN/J8nXu8stuxZW1UVVtb2q7q6qdwNbgfnmef9fVZ1dVXdV1XeA04E/rKovdrtbvA44McmDJ/2GJWl/Z0GWpPH8KHBjc/vGbtmu+77a3Nde3+WPq+qw7nLEroVJXpDk87vKM/AfgCOa5+2+rh8D/qx5/C3A3cDRy3pXknQAsyBL0nj+hYVyusvqbhnAdu5dUEfaJzjJQ4Gzgf8L+OGqOgy4DkjzsNrtaV8FXtSU7cOq6r5VdeXob0WSBBZkSRrXBcCrkswlOQJ4NfDX3X0XAS9M8pNJ7gf83ojrvD8LBXgnkCQvZmEL8t78BXBmkp9k4UmHJfmVfXwvkiQsyJI0rtcBG4GrgKuBz3bLqKq/B94G/CNwPXBF95w79rbCqroK+FPg0yxshT6OhR/d7e057wXeDLw3ye3dPE9a1juSpANcqnb/lk6SNA3d1t0twCEet1iShsstyJI0RUme0R0r+UHAG4C/sxxL0rBZkCVpul4C7AC+AtzFwg/vJEkD5i4WkiRJUsMtyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCPIOS3JVkc5ItSd6b5H57eexrk/zOCs72n5Nck+TuJPMr9brSkA08s29Mcl2Sq5JckuSwlXptaYgGntf/3mV1c5KPJvnRlXrtA40FeTZ9p6rWVtUJwHeB0/seqLEFeCZwed+DSAMy5MxeBpxQVY8AvgS8oud5pL4NOa9vrKpHVNVa4IPAq/seaH9lQZ59nwJ+HCDJ87pPlp9P8le7PzDJi5N8prv/4l2firutvlu65Zd3y45P8unuU+pVSY4dZZiquraqvjjB9yftb4aW2Y9W1Z3dzSuAoyfyLqX9w9Dyentz81Cgxn6HWtSqvgfQ8iVZBZwKfDjJ8cCrgJ+tqluSHL7IU95fVe/onvs64EXAn7LwCfRJVXVT8/Xq6cBbq+r8JPcBDuqe9yngAYus+3eq6h8m+f6k/c0MZPbXgQvHe5fS/mGoeU3yeuB5wDeAJ0zo7Wo3FuTZdN8km7vrnwLOAV4CvLeqbgGoqtsWed4JXWgPA+4PfKRb/j+BDUkuAt7fLftn4MwkR7MQ+i93633cNN6QtJ8bfGaTnAncCZy/r29O2s8MOq9VdWb33FcALwNes4z3qCVYkGfTd7r9j74vySjP2wA8vao+n+QFwEkAVXV6kp8GngxsSvLoqnp3kiu7ZZcmeUlVfdwtyNKyDDqz3bqfApxcVX5lqwPdoPPaOB+4FAvyVFiQ9x8fBy5J8uaqujXJ4Yt8wn0AsD3JwcCvAjcBJHlYVV0JXJnkVOCYJA8EbqiqtyVZDTwC+LhbkKWJGURmk5wCvBz4+ar69mTforTfGEpej921tRl4GnDd5N6iWhbk/URVXdPtl/TJJHcBnwNesNvDfg+4EtjZ/bnrU+obux8IBPgY8Hngd4HnJvke8DXgD0eZI8kzWNjnag74UJLNVfWkcd6btD8aSmaBtwOHAJd1W8muqKoh/Wpf6t2A8npWkuOAu4EbGdYRNvYr8ds0SZIk6R4e5k2SJElqWJAlSZKkhgVZkiRJaliQByrJmUmuyT3nXP/pKbzGIUn+oVv/sye9/uZ1tiY5Ylrrl4bAzEqzw7xqKR7FYoCSPIaFY5I+qqru6P7Fv88UXuqRALsf71HSvjGz0uwwrxqFW5CH6Sjglqq6A6Cqbqmqf0ny6CSfTLIpyUeSHJVkVRbO/X4SQJL/0R2KZq+S/Ajw18B/6j7dPmyx9XeP/USStyTZmOTaJP8pyfuTfDkLZw3atc6/6Z57TZJ1e3jdX8s955//f5McNP5fl9Q7MyvNDvOqpVWVl4FdWDhF5WbgS8CfAz8PHAz8L2Cue8yzgXO768cD1wJPZOHYjPfplr+lW8/ulzO6+08CPthd39v6PwG8obv+m8C/sPAfmEOAbcAPd/cd3v15X2BLs3wrcATwk8DfAQd3y/8ceF7ff99evIx7MbNevMzOxbx6GeXiLhYDVFXfSvJo4HHAE4ALgdcBJ3DPwfwPArZ3j78myV8BHwQeU1Xf7Zb/13142eP2tP7OB7o/rwauqartAEluAI4BbgX+7yycKIRu2bHd8l1OBh4NfKZ7jfsCO/ZhRmmQzKw0O8yrRmFBHqiquouFT5WfSHI18FIWQvOYPTzlPwJfB35k14Ikb2Eh/Lt7T1WdtduyLLH+O7o/726u77q9qvv66Yks/Mfj20k+AfzQIq9xXlW9Yg+vIc0sMyvNDvOqpbgP8gAlOS4Lp6XcZS0LX+/MZeHHBSQ5OMnx3fVnAocDjwf+NMlhsPDptqrWLnLZPbgAX9zT+kf0QOBfu+D+B+BnFnnMx4Bf6fbNIsnhSX5sH15DGiQzK80O86pRWJCH6f7AeUm+kOQq4OHAq4FfAd6Q5PMs7Of0s1n49e1ZwG9U1ZeAtwNv3dcX7L4y+oH178MqPszCp9xru3muWOQ1vgC8Cvho974uY2E/K2nWmVlpdphXLSm1sCO3JEmSJNyCLEmSJN2LBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpMYgThRyxBFH1Jo1a/oeQxqETZs23VJVc33PsTdmVrrH0DNrXqV7jJrXQRTkNWvWsHHjxr7HkAYhyY19z7AUMyvdY+iZNa/SPUbNq7tYSJIkSQ0LsiRJktSwIEuSJEkNC7IkSZLUsCBLkiRJjSULcpJzk+xIsqVZdmGSzd1la5LN3fI1Sb7T3PcX0xxe0g8ys9LsMK/SMI1ymLcNwNuBd+1aUFXP3nU9yZuAbzSP/0pVrZ3UgJL22QbMrDQrNmBepcFZsiBX1eVJ1ix2X5IAzwJ+YbJjSVouMyvNDvMqDdO4+yA/Dri5qr7cLHtIks8l+WSSx425fkmTZWal2WFepZ6Meya904ALmtvbgdVVdWuSRwN/k+T4qrp99ycmWQesA1i9evWYY0gakZmVZod5lXqy7C3ISVYBzwQu3LWsqu6oqlu765uArwA/sdjzq2p9Vc1X1fzc3GBPYS/tN8ysNDvMq9SvcXaxeCJwXVVt27UgyVySg7rrDwWOBW4Yb0RJE2JmpdlhXqUejXKYtwuAfwaOS7ItyYu6u57Dvb/6AXg8cFV3SJr3AadX1W2THFjS3plZaXaYV2mYRjmKxWl7WP6CRZZdDFw8/liSlsvMSrPDvErD5Jn0JEmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpMaSBTnJuUl2JNnSLHttkpuSbO4uv9zc94ok1yf5YpInTWtwSYszs9LsMK/SMI2yBXkDcMoiy99SVWu7y6UASR4OPAc4vnvOnyc5aFLDShrJBsysNCs2YF6lwVmyIFfV5cBtI67vacB7quqOqvrfwPXAiWPMJ2kfmVlpdphXaZjG2Qf5ZUmu6r4eelC37MHAV5vHbOuWSeqfmZVmh3mVerTcgnw28DBgLbAdeNO+riDJuiQbk2zcuXPnMseQNCIzK80O8yr1bFkFuapurqq7qupu4B3c8xXPTcAxzUOP7pYtto71VTVfVfNzc3PLGUPSiMysNDvMq9S/ZRXkJEc1N58B7Pr17QeA5yQ5JMlDgGOBT483oqRxmVlpdphXqX+rlnpAkguAk4AjkmwDXgOclGQtUMBW4CUAVXVNkouALwB3Ai+tqrumM7qkxZhZaXaYV2mYUlV9z8D8/Hxt3Lix7zGkQUiyqarm+55jb8ysdI+hZ9a8SvcYNa+eSU+SJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqLFmQk5ybZEeSLc2yNya5LslVSS5Jcli3fE2S7yTZ3F3+YprDS/pBZlaaHeZVGqZRtiBvAE7ZbdllwAlV9QjgS8Armvu+UlVru8vpkxlT0j7YgJmVZsUGzKs0OEsW5Kq6HLhtt2Ufrao7u5tXAEdPYTZJy2BmpdlhXqVhmsQ+yL8O/H1z+yFJPpfkk0keN4H1S5osMyvNDvMq9WDVOE9OciZwJ3B+t2g7sLqqbk3yaOBvkhxfVbcv8tx1wDqA1atXjzOGpBGZWWl2mFepP8vegpzkBcBTgF+tqgKoqjuq6tbu+ibgK8BPLPb8qlpfVfNVNT83N7fcMSSNyMxKs8O8Sv1aVkFOcgrwcuCpVfXtZvlckoO66w8FjgVumMSgkpbPzEqzw7xK/VtyF4skFwAnAUck2Qa8hoVf1B4CXJYE4Iru17SPB/4gyfeAu4HTq+q2RVcsaSrMrDQ7zKs0TEsW5Ko6bZHF5+zhsRcDF487lKTlM7PS7DCv0jB5Jj1JkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpMVJBTnJukh1JtjTLDk9yWZIvd38+qFueJG9Lcn2Sq5I8alrDS/pB5lWaHeZVGqZRtyBvAE7ZbdkZwMeq6ljgY91tgFOBY7vLOuDs8ceUtA82YF6lWbEB8yoNzkgFuaouB27bbfHTgPO66+cBT2+Wv6sWXAEcluSoSQwraWnmVZod5lUapnH2QT6yqrZ3178GHNldfzDw1eZx27plkvpjXqXZYV6lnk3kR3pVVUDty3OSrEuyMcnGnTt3TmIMSSNYTl7BzEp9MK9SP8YpyDfv+mqn+3NHt/wm4JjmcUd3y+6lqtZX1XxVzc/NzY0xhqQRjJVXMLPSCjKvUs/GKcgfAJ7fXX8+8LfN8ud1v7b9GeAbzVdFkvphXqXZYV6lnq0a5UFJLgBOAo5Isg14DXAWcFGSFwE3As/qHn4p8MvA9cC3gRdOeGZJe2FepdlhXqVhGqkgV9Vpe7jr5EUeW8BLxxlK0vKZV2l2mFdpmDyTniRJktSwIEuSJEkNC7IkSZLUsCBLkiRJDQuyJEmS1LAgS5IkSQ0LsiRJktSwIEuSJEkNC7IkSZLUsCBLkiRJDQuyJEmS1LAgS5IkSQ0LsiRJktSwIEuSJEkNC7IkSZLUsCBLkiRJDQuyJEmS1LAgS5IkSQ0LsiRJktRYtdwnJjkOuLBZ9FDg1cBhwIuBnd3yV1bVpcueUNJEmFlpdphXqV/LLshV9UVgLUCSg4CbgEuAFwJvqao/nsiEkibCzEqzw7xK/ZrULhYnA1+pqhsntD5J02VmpdlhXqUVNqmC/Bzggub2y5JcleTcJA+a0GtImhwzK80O8yqtsLELcpL7AE8F3tstOht4GAtfDW0H3rSH561LsjHJxp07dy72EElTYGal2WFepX5MYgvyqcBnq+pmgKq6uaruqqq7gXcAJy72pKpaX1XzVTU/Nzc3gTEkjcjMSrPDvEo9mERBPo3mq58kRzX3PQPYMoHXkDQ5ZlaaHeZV6sGyj2IBkORQ4BeBlzSL/yjJWqCArbvdJ6lHZlaaHeZV6s9YBbmq/g344d2WPXesiSRNjZmVZod5lfrjmfQkSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkxqpxV5BkK/BN4C7gzqqaT3I4cCGwBtgKPKuq/nXc15I0HvMqzQ7zKvVnUluQn1BVa6tqvrt9BvCxqjoW+Fh3W9IwmFdpdphXqQfT2sXiacB53fXzgKdP6XUkjc+8SrPDvEorYBIFuYCPJtmUZF237Miq2t5d/xpw5AReR9L4zKs0O8yr1JOx90EGfq6qbkryI8BlSa5r76yqSlK7P6kL+zqA1atXT2AMSSNYVl7BzEo9MK9ST8beglxVN3V/7gAuAU4Ebk5yFED3545Fnre+quaran5ubm7cMSSNYLl57Z5jZqUVZF6l/oy1BTnJocC/q6pvdtd/CfgD4APA84Gzuj//dtxBJY3HvEqzY1bzuuaMD01kPVvPevJE1iMt17i7WBwJXJJk17reXVUfTvIZ4KIkLwJuBJ415utIGp95lWaHeZV6NFZBrqobgJ9aZPmtwMnjrFvSZJlXaXaYV6lfnklPkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqTOJMepIkSYPiMZk1DrcgS5IkSQ0LsiRJktSwIEuSJEkNC7IkSZLUsCBLkiRJDQuyJEmS1LAgS5IkSQ0LsiRJktTwRCGaOA/OLkmSZplbkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaiz7R3pJjgHeBRwJFLC+qt6a5LXAi4Gd3UNfWVWXjjuopPGY2eHwh6xainmV+jXOUSzuBP5bVX02yQOATUku6+57S1X98fjjSZogMyvNDvMq9WjZBbmqtgPbu+vfTHIt8OBJDSZpsg7kzLrFVrPmQM4rTC6z0nJNZB/kJGuARwJXdoteluSqJOcmedAkXkPS5JhZaXaYV2nljV2Qk9wfuBj4raq6HTgbeBiwloVPv2/aw/PWJdmYZOPOnTsXe4ikKTCz0uwwr1I/xirISQ5mIbjnV9X7Aarq5qq6q6ruBt4BnLjYc6tqfVXNV9X83NzcOGNIGpGZlWaHeZX6s+yCnCTAOcC1VfXmZvlRzcOeAWxZ/niSJsXMSrPDvEr9GucoFo8FngtcnWRzt+yVwGlJ1rJwWJqtwEvGmlAHLH9YNXFmVpod5nUg/H/RgWmco1j8E5BF7vJ4jNIAmVlpdphXqV+eSU+SJElqWJAlSZKkxjj7IGs/44HZJUmS3IIsSZIk3YsFWZIkSWpYkCVJkqSG+yBL0ojcT1+SDgxuQZYkSZIaFmRJkiSpYUGWJEmSGu6DrP3epPYb3XrWkyeyHkmSNGxuQZYkSZIaFmRJkiSpYUGWJEmSGu6DvB/w2KySJEmT4xZkSZIkqeEWZEmSpCnziEqzxS3IkiRJUsMtyJI0o9wiJUnTMbUtyElOSfLFJNcnOWNaryNpfOZVmh3mVZq+qRTkJAcBfwacCjwcOC3Jw6fxWpLGY16l2WFepZUxrV0sTgSur6obAJK8B3ga8IUpvZ6k5TOvB7hJ7KrhbhorxrxKK2BaBfnBwFeb29uAn57Sa0krYj8uEeZVmh3mVVoBvf1IL8k6YF1381tJbgVu6WuePTiCYc3kPEsb2kz3midvGOk5PzatYcYxA5kd9D/7gZjaTCP+u727of0dLXeewWXWvC7L0GZadJ5lZm0Shvb3A8ubaaS8Tqsg3wQc09w+ulv2fVW1Hli/63aSjVU1P6V5lmVoMznP0oY209Dm2YMl8wrDz6zzLG1oMznPspjXKRnaTM6ztGnONK2jWHwGODbJQ5LcB3gO8IEpvZak8ZhXaXaYV2kFTGULclXdmeRlwEeAg4Bzq+qaabyWpPGYV2l2mFdpZUxtH+SquhS4dB+esn7ph6y4oc3kPEsb2kxDm2dRy8grDO+9Oc/ShjaT8yyDeZ2aoc3kPEub2kypqmmtW5IkSZo5UzuTniRJkjSLei/ISc5NsiPJlr5nAUhyTJJ/TPKFJNck+c0BzPRDST6d5PPdTL/f90ywcEanJJ9L8sEBzLI1ydVJNifZ2Pc8AEkOS/K+JNcluTbJY/qeaRLM7JLzmNcRDC2z5nVlmNfRmNcl55l6XnvfxSLJ44FvAe+qqhN6HWZhnqOAo6rqs0keAGwCnl5VvZ2lKEmAQ6vqW0kOBv4J+M2quqKvmbq5fhuYB/59VT2l51m2AvNVNZhjNCY5D/hUVb2z+7X5/arq633PNS4zu+Q85nW0ebYyoMya1xWbx7yONpd53YuVyGvvW5Cr6nLgtr7n2KWqtlfVZ7vr3wSuZeHMRX3OVFX1re7mwd2l1082SY4Gngy8s885hirJA4HHA+cAVNV394f/2YKZHWEe8zpjzOvKMa9LM697t1J57b0gD1mSNcAjgSv7neT7X7dsBnYAl1VV3zP9CfBy4O6e59ilgI8m2ZSFM0j17SHATuAvu6/J3pnk0L6H2t8NJbPmdSRDyqx57YF53SPzuncrklcL8h4kuT9wMfBbVXV73/NU1V1VtZaFsyadmKS3r8qSPAXYUVWb+pphET9XVY8CTgVe2n2t2KdVwKOAs6vqkcC/AWf0O9L+bUiZNa8jGVJmzesKM6+LM68jWZG8WpAX0e2HdDFwflW9v+95Wt3XCP8InNLjGI8Fntrtk/Qe4BeS/HWP81BVN3V/7gAuAU7scx5gG7Ct2RLxPhYCrSkYambN654NLLPmdQWZ170yr0tbkbxakHfT7bB/DnBtVb2573kAkswlOay7fl/gF4Hr+pqnql5RVUdX1RoWTnP68ar6tb7mSXJo92MPuq9Zfgno9RfbVfU14KtJjusWnQz09kPP/dnQMmtelza0zJrXlWNe9868Lm2l8jq1M+mNKskFwEnAEUm2Aa+pqnN6HOmxwHOBq7t9kgBe2Z25qC9HAeclOYiFDzUXVdUgDv0yEEcClyz8d5dVwLur6sP9jgTAfwHO735hewPwwp7nmQgzuyTzurQhZta8rgzzOnsOyLz2fpg3SZIkaUjcxUKSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBXkGJbkryeYkW5K8N8n99vLY1yb5nRWc7T8nuSbJ3UnmV+p1pSEbeGbfmOS6JFcluWTXSROkA9XA8/rfu6xuTvLRJD+6Uq99oLEgz6bvVNXaqjoB+C5wet8DNbYAzwQu73sQaUCGnNnLgBOq6hHAl4BX9DyP1Lch5/WNVfWIqloLfBB4dd8D7a8syLPvU8CPAyR5XvfJ8vNJ/mr3ByZ5cZLPdPdfvOtTcbfVd0u3/PJu2fFJPt19Sr0qybGjDFNV11bVFyf4/qT9zdAy+9GqurO7eQVw9ETepbR/GFpeb29uHgp4trcp6f1U01q+JKuAU4EPJzkeeBXws1V1S5LDF3nK+6vqHd1zXwe8CPhTFj6BPqmqbmq+Xj0deGtV7TqV40Hd8z4FPGCRdf9OVf3DJN+ftL+Zgcz+OnDheO9S2j8MNa9JXg88D/gG8IQJvV3txoI8m+7bnMP+U8A5wEuA91bVLQBVddsizzuhC+1hwP2Bj3TL/yewIclFwPu7Zf8MnJnkaBZC/+VuvY+bxhuS9nODz2ySM4E7gfP39c1J+5lB57Wqzuye+wrgZcBrlvEetQQL8mz6Trf/0fclGeV5G4CnV9Xnk7wAOAmgqk5P8tPAk4FNSR5dVe9OcmW37NIkL6mqj7sFWVqWQWe2W/dTgJOryq9sdaAbdF4b5wOXYkGeCgvy/uPjwCVJ3lxVtyY5fJFPuA8Atic5GPhV4CaAJA+rqiuBK5OcChyT5IHADVX1tiSrgUcAH3cLsjQxg8hsklOAlwM/X1XfnuxblPYbQ8nrsbu2NgNPA66b3FtUy4K8n6iqa7r9kj6Z5C7gc8ALdnvY7wFXAju7P3d9Sn1j9wOBAB8DPg/8LvDcJN8Dvgb84ShzJHkGC/tczQEfSrK5qp40znuT9kdDySzwduAQ4LJuK9kVVTWkX+1LvRtQXs9KchxwN3AjwzrCxn4lfpsmSZIk3cPDvEmSJEkNC7IkSZLUsCBLkiRJDQvyDElyZpJrcs952H+675l2l2RDkl/pew5pCMysNDvMq1oexWJGJHkMC8cpfVRV3ZHkCOA+PY8laQ/MrDQ7zKt25xbk2XEUcEtV3QFQVbdU1b8keXSSTybZlOQjSY5KsioL54M/CSDJ/+gOT7OkJK9Ncl6STyW5Mckzk/xRkquTfLg7viNJXt29xpYk67PIUdQXm21yfx3S4JlZaXaYV91bVXmZgQsLp63cDHwJ+HPg54GDgf+U/zXJAAAUmUlEQVQFzHWPeTZwbnf9eOBa4IksHK/xPt3yt3Tr2f1yRnf/a4F/6tb9U8C3gVO7+y5h4SxBAIc3s/0V8H901zcAv7K32bx4ORAuZtaLl9m5mFcvu1/cxWJGVNW3kjwaeBzwBOBC4HXACdxzgP+DgO3d469J8lfAB4HHVNV3u+X/dYSX+/uq+l6Sq7t1frhbfjWwprv+hCQvB+4HHA5cA/xds47j9jSbdCAws9LsMK/anQV5hlTVXcAngE90wXopcE1VPWYPT/mPwNeBH9m1IMlbWAj/7t5TVWd113d9xXR3ku9V9/GUhTP3rEryQyx8wp6vqq8meS3wQ7utL0vMJu33zKw0O8yrWhbkGZHu1JJ1zznY17Lw9c4vJXlMVf1zt+/ST3SfbJ/JwqfOxwMfTHJiVX19xE+3S9kV1FuS3J+Fr3vet9tjvgjMLTbbBF5fGjwzK80O86rdWZBnx/2BP01yGHAncD2wDlgPvC3JA1n45/knSW4GzgJO7j59vh14K/D8SQxSVV9P8g5gCwvnkP/MIo/5bhYORXOv2Vj4mkg6EJhZaXaYV91L7tmyL0mSJMnDvEmSJEkNC7IkSZLUsCBLkiRJDQuyJEmS1LAgS5IkSY1BHObtiCOOqDVr1vQ9hjQImzZtuqWq5vqeY2/MrHSPoWfWvEr3GDWvgyjIa9asYePGjX2PIQ1Ckhv7nmEpZla6x9Aza16le4yaV3exkCRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGksW5CTnJtmRZEuz7MIkm7vL1iSbu+Vrknynue8vpjm8pB9kZqXZYV6lYRrlMG8bgLcD79q1oKqevet6kjcB32ge/5WqWjupASXtsw2YWWlWbMC8SoOzZEGuqsuTrFnsviQBngX8wmTHkrRcZlaaHeZVGqZxTxTyOODmqvpys+whST4H3A68qqo+NeZrSJocMyvNjpnL65ozPjSR9Ww968kTWY+0XOMW5NOAC5rb24HVVXVrkkcDf5Pk+Kq6ffcnJlkHrANYvXr1mGNIGpGZlWaHeZV6suyjWCRZBTwTuHDXsqq6o6pu7a5vAr4C/MRiz6+q9VU1X1Xzc3ODPYW9tN8ws9LsMK9Sv8Y5zNsTgeuqatuuBUnmkhzUXX8ocCxww3gjSpoQMyvNDvMq9WiUw7xdAPwzcFySbUle1N31HO791Q/A44GrukPSvA84vapum+TAkvbOzEqzw7xKwzTKUSxO28PyFyyy7GLg4vHHkrRcZlaaHeZVGibPpCdJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1lizISc5NsiPJlmbZa5PclGRzd/nl5r5XJLk+yReTPGlag0tanJmVZod5lYZplC3IG4BTFln+lqpa210uBUjycOA5wPHdc/48yUGTGlbSSDZgZqVZsQHzKg3OkgW5qi4HbhtxfU8D3lNVd1TV/wauB04cYz5J+8jMSrPDvErDNM4+yC9LclX39dCDumUPBr7aPGZbt0xS/8ysNDvMq9Sj5Rbks4GHAWuB7cCb9nUFSdYl2Zhk486dO5c5hqQRmVlpdphXqWfLKshVdXNV3VVVdwPv4J6veG4CjmkeenS3bLF1rK+q+aqan5ubW84YkkZkZqXZYV6l/i2rICc5qrn5DGDXr28/ADwnySFJHgIcC3x6vBEljcvMSrPDvEr9W7XUA5JcAJwEHJFkG/Aa4KQka4ECtgIvAaiqa5JcBHwBuBN4aVXdNZ3RJS3GzEqzw7xKw7RkQa6q0xZZfM5eHv964PXjDCVp+cysNDvMqzRMnklPkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaliQJUmSpIYFWZIkSWpYkCVJkqSGBVmSJElqWJAlSZKkhgVZkiRJaixZkJOcm2RHki3NsjcmuS7JVUkuSXJYt3xNku8k2dxd/mKaw0v6QWZWmh3mVRqmUbYgbwBO2W3ZZcAJVfUI4EvAK5r7vlJVa7vL6ZMZU9I+2ICZlWbFBsyrNDhLFuSquhy4bbdlH62qO7ubVwBHT2E2SctgZqXZYV6lYZrEPsi/Dvx9c/shST6X5JNJHjeB9UuaLDMrzQ7zKvVg1ThPTnImcCdwfrdoO7C6qm5N8mjgb5IcX1W3L/LcdcA6gNWrV48zhqQRmVlpdphXqT/L3oKc5AXAU4BfraoCqKo7qurW7vom4CvATyz2/KpaX1XzVTU/Nze33DEkjcjMSrPDvEr9WlZBTnIK8HLgqVX17Wb5XJKDuusPBY4FbpjEoJKWz8xKs8O8Sv1bcheLJBcAJwFHJNkGvIaFX9QeAlyWBOCK7te0jwf+IMn3gLuB06vqtkVXLGkqzKw0O8yrNExLFuSqOm2Rxefs4bEXAxePO5Sk5TOz0uwwr9IweSY9SZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqTFSQU5ybpIdSbY0yw5PclmSL3d/PqhbniRvS3J9kquSPGpaw0v6QeZVmh3mVRqmUbcgbwBO2W3ZGcDHqupY4GPdbYBTgWO7yzrg7PHHlLQPNmBepVmxAfMqDc5IBbmqLgdu223x04DzuuvnAU9vlr+rFlwBHJbkqEkMK2lp5lWaHeZVGqZx9kE+sqq2d9e/BhzZXX8w8NXmcdu6ZZL6Y16l2WFepZ5N5Ed6VVVA7ctzkqxLsjHJxp07d05iDEkjWE5ewcxKfTCvUj/GKcg37/pqp/tzR7f8JuCY5nFHd8vuparWV9V8Vc3Pzc2NMYakEYyVVzCz0goyr1LPxinIHwCe311/PvC3zfLndb+2/RngG81XRZL6YV6l2WFepZ6tGuVBSS4ATgKOSLINeA1wFnBRkhcBNwLP6h5+KfDLwPXAt4EXTnhmSXthXqXZYV6lYRqpIFfVaXu46+RFHlvAS8cZStLymVdpdphXaZg8k54kSZLUsCBLkiRJDQuyJEmS1LAgS5IkSQ0LsiRJktSwIEuSJEkNC7IkSZLUsCBLkiRJDQuyJEmS1LAgS5IkSQ0LsiRJktSwIEuSJEkNC7IkSZLUsCBLkiRJjVV9DyBJktRac8aHxl7H1rOePIFJdKByC7IkSZLUsCBLkiRJDQuyJEmS1LAgS5IkSY1l/0gvyXHAhc2ihwKvBg4DXgzs7Ja/sqouXfaEkibCzEqzw7xK/Vp2Qa6qLwJrAZIcBNwEXAK8EHhLVf3xRCaUNBFmVpod5lXq16R2sTgZ+EpV3Tih9UmaLjMrzQ7zKq2wSRXk5wAXNLdfluSqJOcmedCEXkPS5JhZaXaYV2mFjV2Qk9wHeCrw3m7R2cDDWPhqaDvwpj08b12SjUk27ty5c7GHSJoCMyvNDvMq9WMSW5BPBT5bVTcDVNXNVXVXVd0NvAM4cbEnVdX6qpqvqvm5ubkJjCFpRGZWmh3mVerBJAryaTRf/SQ5qrnvGcCWCbyGpMkxs9LsMK9SD5Z9FAuAJIcCvwi8pFn8R0nWAgVs3e0+ST0ys9LsMK9Sf8YqyFX1b8AP77bsuWNNJGlqzKw0O8yr1B/PpCdJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDUsyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUsOCLEmSJDVW9T2AJE3bmjM+NJH1bD3ryRNZjyRp2NyCLEmSJDXG3oKcZCvwTeAu4M6qmk9yOHAhsAbYCjyrqv513NeSNB7zKs0O8yr1Z1JbkJ9QVWurar67fQbwsao6FvhYd1vSMJhXaXaYV6kH09oH+WnASd3184BPAL87pdc64Ll/pcZkXqXZYV6lFTCJLcgFfDTJpiTrumVHVtX27vrXgCMn8DqSxmdepdlhXqWeTGIL8s9V1U1JfgS4LMl17Z1VVUlq9yd1YV8HsHr16gmMIWkEy8ormFmpB+ZV6snYW5Cr6qbuzx3AJcCJwM1JjgLo/tyxyPPWV9V8Vc3Pzc2NO4akESw3r91zzKy0gsyr1J+xCnKSQ5M8YNd14JeALcAHgOd3D3s+8LfjvI6k8ZlXaXaYV6lf4+5icSRwSZJd63p3VX04yWeAi5K8CLgReNaYryNpfOZVmh3mVerRWAW5qm4AfmqR5bcCJ4+zbkmTZV6l2WFepX55Jj1JkiSpYUGWJEmSGhZkSZIkqTGtM+lJkiT1xrPMahxuQZYkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSpYUGWJEmSGquW+8QkxwDvAo4EClhfVW9N8lrgxcDO7qGvrKpLxx1U0njM7HCsOeNDE1nP1rOePJH1aHjMq9SvZRdk4E7gv1XVZ5M8ANiU5LLuvrdU1R+PP56kCTKz0uwwr1KPll2Qq2o7sL27/s0k1wIPntRgkibLzEqzw7xK/RpnC/L3JVkDPBK4Engs8LIkzwM2svAJ+F8n8TqaLr/2PXCYWWl2mFdp5Y39I70k9wcuBn6rqm4HzgYeBqxl4dPvm/bwvHVJNibZuHPnzsUeImkKzKw0O8yr1I+xCnKSg1kI7vlV9X6Aqrq5qu6qqruBdwAnLvbcqlpfVfNVNT83NzfOGJJGZGal2WFepf4suyAnCXAOcG1VvblZflTzsGcAW5Y/nqRJMbPS7DCvUr/G2Qf5scBzgauTbO6WvRI4LclaFg5LsxV4yVgTSpoUMyvNDvMq9Wico1j8E5BF7vJ4jNIAmVlpdphXqV8TOYqFJEnS/mhSR3iaFI8UtTI81bQkSZLUcAuyJs7jKUuSpFnmFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSp4XGQNVgeT1lDM7QzakmSpsMtyJIkSVLDgixJkiQ1LMiSJElSw4IsSZIkNSzIkiRJUuOAO4qFR0Y48PjPvB+T+Hv371yS1IcDriBLku7NDzPyEIbSvbmLhSRJktSY2hbkJKcAbwUOAt5ZVWdN67Ukjce8zia3+h2YzKs0fVPZgpzkIODPgFOBhwOnJXn4NF5L0njMqzQ7zKu0Mqa1BflE4PqqugEgyXuApwFfmNLrSVO3H++naV6l2WFepRUwrX2QHwx8tbm9rVsmaXjMqzQ7zKu0Ano7ikWSdcC67ua3ktwK3NLXPHtwBHuYKW9Y4UkW7HGengxtHhjeTPeaZ8R/b35sWsOMo4/M7mPOBv3PfiCmNtMy/5s4tL+j5c4zuMzOwP9jh/bPHoY306Lz9NQ/YHh/P7C8mUbK67QK8k3AMc3to7tl31dV64H1u24n2VhV81OaZ1mGNpPzLG1oMw1tnj1YMq8w/Mw6z9KGNpPzLIt5nZKhzeQ8S5vmTNPaxeIzwLFJHpLkPsBzgA9M6bUkjce8SrPDvEorYCpbkKvqziQvAz7CwmFozq2qa6bxWpLGY16l2WFepZUxtX2Qq+pS4NJ9eMr6pR+y4oY2k/MsbWgzDW2eRS0jrzC89+Y8SxvaTM6zDOZ1aoY2k/MsbWozpaqmtW5JkiRp5niqaUmSJKnRe0FOcm6SHUm29D0LQJJjkvxjki8kuSbJbw5gph9K8ukkn+9m+v2+Z4KFMzol+VySDw5glq1Jrk6yOcnGvucBSHJYkvcluS7JtUke0/dMk2Bml5zHvI5gaJk1ryvDvI7GvC45z9Tz2vsuFkkeD3wLeFdVndDrMAvzHAUcVVWfTfIAYBPw9Krq7SxFSQIcWlXfSnIw8E/Ab1bVFX3N1M3128A88O+r6ik9z7IVmK+qwRyjMcl5wKeq6p3dr83vV1Vf73uucZnZJecxr6PNs5UBZda8rtg85nW0uczrXqxEXnvfglxVlwO39T3HLlW1vao+213/JnAtPZ+lqBZ8q7t5cHfp9ZNNkqOBJwPv7HOOoUryQODxwDkAVfXd/eF/tmBmR5jHvM4Y87pyzOvSzOverVReey/IQ5ZkDfBI4Mp+J/n+1y2bgR3AZVXV90x/ArwcuLvnOXYp4KNJNmXhDFJ9ewiwE/jL7muydyY5tO+h9ndDyax5HcmQMmtee2Be98i87t2K5NWCvAdJ7g9cDPxWVd3e9zxVdVdVrWXhrEknJuntq7IkTwF2VNWmvmZYxM9V1aOAU4GXdl8r9mkV8Cjg7Kp6JPBvwBn9jrR/G1JmzetIhpRZ87rCzOvizOtIViSvFuRFdPshXQycX1Xv73ueVvc1wj8Cp/Q4xmOBp3b7JL0H+IUkf93jPFTVTd2fO4BLgBP7nAfYBmxrtkS8j4VAawqGmlnzumcDy6x5XUHmda/M69JWJK8W5N10O+yfA1xbVW/uex6AJHNJDuuu3xf4ReC6vuapqldU1dFVtYaF05x+vKp+ra95khza/diD7muWXwJ6/cV2VX0N+GqS47pFJwO9/dBzfza0zJrXpQ0ts+Z15ZjXvTOvS1upvE7tTHqjSnIBcBJwRJJtwGuq6pweR3os8Fzg6m6fJIBXdmcu6stRwHlJDmLhQ81FVTWIQ78MxJHAJQv/3WUV8O6q+nC/IwHwX4Dzu1/Y3gC8sOd5JsLMLsm8Lm2ImTWvK8O8zp4DMq+9H+ZNkiRJGhJ3sZAkSZIaFmRJkiSpYUGWJEmSGhZkSZIkqWFBliRJkhoWZEmSJKlhQZYkSZIaFmRJkiSp8f8DV4fsjzqUlBsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axs = hdf_fenced.stratify(['Pclass', 'Sex']).cols['logFare'].hist(figsize=(10, 8))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stratified scatterplots" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1gAAAGqCAYAAAAWWuWTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3XecZFWZ//HPqVzVOc4wOZKHOGRBgogERRSzElbBBGJgdc3urqs/XVdREBUDCIIi0YRkYRjCwAxpcs6xc+V8fn9UTXXXdE9P9Ux1nO/79eoXXXXr3vvc6p6Hevqe8xxjrUVEREREREQOnGO4AxARERERERkrVGCJiIiIiIiUiQosERERERGRMlGBJSIiIiIiUiYqsERERERERMpEBZaIiIiIiEiZqMASEZERxxhjjTGzhjsOERGRgVKBJSIi/TLGPGOM6TDGeIc7loHKxx43xoR7fJ023HGJiMjYpQJLRET2yhgzDTgTsMC7hjWY/Xedtbayx9eLAz2AMcY1GIGJiMjYowJLRET6cwXwEnAHcGXPDcaYBmPM34wxQWPMK8aY7xpj5vfYfrgx5gljTLsxZqUx5v37E4AxpsYYc6cxpsUYs9EY8w1jjCO/zWmM+T9jTKsxZr0x5rr88MJ9FkTGmFuMMVt6xH96j23fNcbca4z5ozEmBHzUGOMwxnzNGLM2f74/GWPq9ueaRERk7FKBJSIi/bkCuDv/dYExZlyPbT8HIsB4csVXoQAzxlQATwD3AM3AB4FbjTFH7kcMNwM1wAzgrfmYrs5vuwa4EDgOOAF49wCOuwA4BqgH7gfu22MY5GX5+GuAe4EvABcDZwGTgDDws/24HhERGcOMtXa4YxARkRHIGPMW4F/AIdbaVmPMCuBX1tqfGGOcQBw42lq7Mv/67wJnW2vfYoz5ALmheWf2ON6vgG3W2v8s4dwWmA2sB2LAcdbaZfltnwQ+ZK092xjzNHCvtfZX+W1vI1fYua21aWPMM8DJQDJ/6HXW2hP6OJ8BgsCp1tql+Ws53Vp7bo/XrAY+Ya19Nv94MrAaCFhrs6W8pyIiMvbpDpaIiOzNlcDj1trW/ON76L5L1QS4gM09Xt/z+6nAKcaYzt1fwEfI3e0aiEbADWzs8dxGYGL++wn9xLDb56y1tfmvQnFljPmyMWaFMaYL6AAq8ufb27GmAH/rcT2L8883D/CaRERkDNOkXRER6cUY4wfeDziNMTvyT3uBWmPMscASIE1uqNyq/PbJPQ6xGXjWWnv+AYbSCqTIFWzL8s9NAbbmv9+ej2G3njHslTHmHOCLwHk9jtsFmB4v23OIxxbgw9baBaUGLyIiBx/dwRIRkb68G8gAR5Kb33QccATwHHCFtTYDPAh8xxgTMMYcTm5u1G5/Bw41xnzMGOPOf51kjDkCwBhzlTFmw76CyJ/nz8D/GGOqjDFTyRVGf8i/5M/ADcaYicaYWuArJV5fFbkCsZXcHbLvkLuD1Z9fAt8zxkzJX0OzMWa0dlYUEZFBogJLRET6ciVwu7V2k7V2x+4v4BbgI/kufdeRawCxA7gL+COQALDWhoC3k2tusS3/mh+QuwsGuTtNz5cYy/XkmmmsA+aTG6r4u/y2XwOPA28CrwGPkCucMvs45iPAk+TmUG0gN/9q+z72+THwKPBUvrPgC8BJJV6DiIgcJNTkQkREysIY8wNgvLX2yhJe+zhwg7V2eZljuBD4pbV2ajmPKyIiUirdwRIRkf2SX+fqGJNzMvBx4KFS9rXWvr0cxZUxxm+MucgY4zLGTAS+XWoMIiIig0EFloiI7K8qcvOwIuTWifo/4C9DHIMB/pNcF8DXgOXAt4Y4BhERkQINERQRERERESkT3cESEREREREpExVYIiIiIiIiZaICS0REREREpExUYImIiIiIiJSJCiwREREREZEyUYElIiIiIiJSJiqwREREREREykQFloiIiIiISJmowBIRERERESkTFVgiIiIiIiJlogJLRERERESkTFRgiYiIiIiIlIkKLBERERERkTJRgSUiIiIiIlImKrBERERERETKRAWWiIiIiIhImajAEhERERERKRMVWCIiIiIiImWiAktERERERKRMVGCJiIiIiIiUiQosERERERGRMlGBJSIiIiIiUiYqsERERERERMpEBZaIiIiIiEiZqMASEREREREpExVYIiIiIiIiZaICS0REREREpExUYImIiIiIiJSJCizpkzEmY4x53RizxBhznzEm0M9rv2OMuXEIY3ufMWapMSZrjJk7VOcVkcExwvPN/xpjVhhj3jTGPGSMqR2qc4vI4BjhOee/8/nmdWPM48aYCUN1bikfFViyNzFr7XHW2qOBJPCp4Q6ohyXAe4B5wx2IiJTFSM43TwBHW2uPAVYBXx3meETkwI3knPO/1tpjrLXHAX8HvjXcAcnAqcCSUjwHzAIwxlyR/8vKG8aYu/Z8oTHmGmPMK/ntD+z+q1D+rtOS/PPz8s8dZYx5Of9XmjeNMbNLCcZau9xau7KM1yciI8dIyzePW2vT+YcvAZPKcpUiMlKMtJwT7PGwArAHfIUy5FzDHYCMbMYYF3Ah8Kgx5ijgG8Dp1tpWY0x9H7s8aK39dX7f7wIfB24m9xeYC6y1W3sMsfkU8FNr7d3GGA/gzO/3HFDVx7FvtNY+Wc7rE5GRYxTkm38D7j2wqxSRkWKk5hxjzP8AVwBdwDllulwZQiqwZG/8xpjX898/B/wW+CRwn7W2FcBa297Hfkfnk04tUAk8ln/+eeAOY8yfgQfzz70IfN0YM4lc0lqdP+6Zg3FBIjJijfh8Y4z5OpAG7h7oxYnIiDOic4619uv5fb8KXAd8ez+uUYaRCizZm1h+/G+BMaaU/e4A3m2tfcMYcxVwNoC19lPGmFOAi4FFxpgTrbX3GGMW5J97xBjzSWvt07qDJXLQGdH5Jn/sS4DzrLUariMy+o3onNPD3cAjqMAadVRgyUA8DTxkjPmxtbbNGFPfx194qoDtxhg38BFgK4AxZqa1dgGwwBhzITDZGFMDrLPW/swYMwU4Bnhad7BEhBGSb4wx7wC+DLzVWhst7yWKyAgyUnLO7N13u4BLgRXlu0QZKiqwpGTW2qX5ccHPGmMywGvAVXu87JvAAqAl/9/df6X53/wETwM8BbwBfAX4mDEmBewAvldKHMaYy8iNeW4C/mGMed1ae8GBXJuIjCwjJd8AtwBe4In8X7hfstaOpI5jIlIGIyjn/D9jzGFAFtjIyOpwKCUyGu0gIiIiIiJSHmrTLiIiIiIiUiYqsERERERERMpEBZaIiIiIiEiZqMASEREREREpkxHVRbCxsdFOmzZtuMMQkQOwaNGiVmtt03DHsS/KNyJjw2jIOco3ImNDqflmRBVY06ZNY+HChcMdhogcAGPMxuGOoRTKNyJjw2jIOco3ImNDqflGQwRFRERERETKRAWWiIiIiIhImajAEhERERERKRMVWCIiIiIiImWiAktERERERKRMVGCJiIiIiIiUiQosERERERGRMlGBJSIiIiIiUiYqsERERERERMrENdwByOjUEU2yoTXC82vaOGV6PTObK6mv8Ax3WCIiIiIiw0oFlgxYNJnmzhc28JMnVxeeu+bM6dxw3mwqfe5hjExEREREZHhpiKAMWCie5tZn1hY9d/vzGwgnMsMUkYiIiIjIyDCoBZYxptYYc78xZoUxZrkx5rTBPJ8MDWshlckWPZfOWrLWDlNEIso3IjK0lHNEZG8G+w7WT4FHrbWHA8cCywf5fDIEAl4n7zp2QtFz5x3RTMDjHKaIRADlGxEZWso5ItKnQZuDZYypAc4CrgKw1iaB5GCdT4ZOtc/NNy85khOm1vGvFS28ZXYD7z5uIrUBNbmQ4aF8IyJDSTlHRPozmE0upgMtwO3GmGOBRcAN1tpIzxcZY64FrgWYMmXKIIYj5dRQ6eWjp0zlvSdMwud24nSY4Q5JDm7KNyIylPaZc5RvRA5egzlE0AWcAPzCWns8EAH+Y88XWWtvs9bOtdbObWpqGsRwpNwcDkOF16XiSkYC5RsRGUr7zDnKNyIHr8EssLYAW6y1C/KP7yeXjEREyk35RkSGknKOiOzVoBVY1todwGZjzGH5p84Dlg3W+UTk4KV8IyJDSTlHRPoz2AsNXw/cbYzxAOuAqwf5fCJy8FK+EZGhpJwjIn0a1ALLWvs6MHcwzyEiAso3IjK0lHNEZG8Gex0sERERERGRg4YKLBERERERkTJRgSUiIiIiIlImKrBERERERETKRAWWiIiIiIhImajAEhERERERKRMVWCIiIiIiImWiAktERERERKRMVGCJiIiIiIiUiQosERERERGRMlGBJSIiIiIiUiYqsERERERERMpEBZaIiIiIiEiZqMASEREREREpExVYIiIiIiIiZaICS0REREREpExUYImIiIiIiJSJCiwREREREZEyUYElIiIiIiJSJq7hDkByUpksDgNOR3HNm81a2iJJkpksHqeDxkoPxphhilJERERERPqjAmsIZDJZWiNJ4qkMVT439RWewrZUJsPm9hi/eGYtjVVerj59Gs3Vvtx+WcuKHUE+edcitnTEmNoQ4LaPzeXQcZUqsvrRGk4QSaTxe5w0V/mGO5yCQlxuZ+FnLCIiIiJjiwqsIbCtK87FNz9HMJbmQydP4T/ecRg1gVyR1RZOcfHP5hNLZQB4YU0rv7vqJBoqvbRFEnz8joXsCMYB2NgW5Zo7F/LAp0+nqco7bNczkrWGE3zyrkUs2tjBlPoAD3z6NJpGQJHVFk5wwx9f4/m1bUyo8fHwZ89QkSUiIiIyBmkO1hB4ZUM7wVgagAdf3UIiky1s29YVKxRXAG9s6SKZ355IZQvF1W6b2qOF7dJbLJlh0cYOIPdetYaTwxxRTiKd5fm1bUCu4N7SGRvmiERERERkMKjAGgInTq2jwuME4KI5h+Bxdr/tE2r8eF3dj4+aUF3Y7nU7et2pmlTnL9pfivndTo6eWA3AhBofDZWefewxNDwuB3On1QHQVOVlYq1/mCMSERERkcGgIYJDYEKtn6dvPJtoMkON30VtoPtDf32Fm79d9xZ+9vRqGiq8fPrsmTRU5oqqhoCHX18xl2vuXEhLKMH4ah+/+tiJNFSMjKJhJGqs8nLHVScTSqSo8LhGzBysxkovv/zIiYW4NMRTREREZGxSgTUE3E4H4/Yy38bjcnLo+Cp+9L5jcRqDu8fdLKfTwZwJ1fzj+reQTGfxuh3UV3hxONTgoj+NVV4aR2ABM1LjEhEREZHyUYE1Qvjczj6fdzodaoYgIiIiIjJKaDKPiIiIiIhImajAEhERERERKRMVWCIiIiIiImWiOVhlYq0lkszgczlw9dFGPZXJkkxnqfAO7VveFk4QTqTxupzUBdx49zLXazAk0xnSWUvAU3zNXbEU61sj3PvKJmY3V/HOYw/ptRhwOpMlns5S4XFijJp6iIiIiMjooAKrDJLpDEu2BvnZ06s5Y1Yjl584iboerdjbI0l+/8J63twS5CvvOIzZ46pwDkEnwNZQgitvf5ml24L43A7u/LeTOXl6w6CfF3KF3U1PrWZXMM43Lj6SyfUBIFeIvri2lU/94dXCa+9esJE/XXtaoXV5ZzTJ/Yu2MH9NK9efO4s5E2vwuIauMBQRERER2V8qsMqgM5riw795iXgqyzMrWzhpWn1RgfXqpg5++tQaAJZs7eIfn3vLkHQGfHFdG0u3BQGIp7J8+69L+cPHTymsszWY/rxwM3e9uBGAllCC31w5l/oKL+2RJDc9ubrotWtbImztjBUKrE3tUb77j+UAvLSujWf//RzGVavAEhEREZGRT3OwysAChu47UnvenDJ7PBiqIW97nsYwdEPtiq+x+3tL77iKXzGwbSIiIiIiI4nuYJVBXcDNPdecws//tYYzZjUyJT8cbrfjp9TxxfMPZfHWLm58+2HUV3j2cqTyOnV6A3Mm1rB4axcBj5P/uvSoIbl7BfC+EyexsyvOjmCcr198BPUVufM2VHj4wtsO5dq7FhVeO7Opkgm1/sLjKfUBvv3OI5m/ppXrzplFbcA9JDGLiIiIiBwoY60d7hgK5s6daxcuXDjcYey3aDKN1+XA6eh9YzCdzZJMZQkMQ5OLSCKD1+2g1j+0TS5SmSzprMW/xzmD8RQbW6P8eWGuycWFc8b3anKRyWZJpLO9GmTIyGeMWWStnTvccezLaM83IpIzGnKO8o3I2FBqvtGn1zLqrxhwORy4vEM/IrOh0ktD5ZCfFgC300Ff9Vy1z82cSTXMmTRnr/s6HQ4CHo1gFREREZHRRZ9gRUREREREykQFloiIiIiISJmowBIRERERESkTFVgiIiIiIiJlogJLRERERESkTFRgiYiIiIiIlIkKLBERERERkTJRgSUiIiIiIlImKrBERERERETKRAWWiIiIiIhImajAEhERERERKRMVWCIiIiIiImWiAktERERERKRMVGCJiIiIiIiUiWu4AxiollCcUDxNhddFrd+N1+0c7pAGXTKdJZpMU+F14nb2vt5IIk3WWqp87gEfO57KkEhlqPS5cTpMr+3BWAqnw1DhHdivSjCWIpxIk8pkqfS5aKjwDmj/RCpDPJWh0uvC6RwZfwdIpjN0RFNEEmmqfC6aqnzDHZKIiIiIjDCjqsDa0RXnslufZ3tXHJ/bwZ8/eRrHTKod7rAGVTCW4pHF23ng1S18+JQpvO2IcUWFVEsowX/+bSnBWIr/uWwOk+sDJR+7PZLk1/PWsWhTB1++4DCOmVSDx9VdwG1pj/K1hxZT5XPxnXcdVXJBEUtmuG/RFr77j2VYC2fObuSmDxxHQ2VpRVZHJMnvX9zA82ta+fzbDuXEqXX4RkAhvWpnmPf98kViqQzjq308/NnTGV/jH+6wRERERGQEGRm3Bkr04Ktb2N4VByCeyvL9f66gK5oa5qgGV1csxX88uJhXNnTwhXvfIBjrvl5rLbfNW8vf39zOvNWt/Pv9b9AZTZZ87OXbg/zi2bW8vL6dK373Mp093svOaJJ/f+BN5q1u5R+Ld/DLZ9dhrS3puMF4ih8+uoLdL39udSu7QomS49rQFuGmJ1fzyoYOrr79Fbpiw/8zDsZSfP+fy4mlMgDsCMa5f9GWYY5KREREREaaUVVg7XkXw+d24BhVVzBwDodh98g9p8NgTPcwPmMMAU/3TUify4nD9B7mtzdel6P4+x67OhwGX4+7WRUeZ9G598Wzx7A+Vx/DD/e6b4+4en4/nIwBr2vP37/hv6smIiIiIiPLoA4RNMZsAEJABkhba+ceyPHeeewhPPDqFpZuC9JU5eUbFx25X/OORpNav5vfXXUSD766lQ+cNJlaf/H1XnHaVJKZLB2RJJ8//1Cq/aW/H7OaK/neZUfz0rp2rjtnVtE8qWqfm//33jn85IlV1PrdXHn6tNJjDrj5/nvm8KX73iCRzvLBkybTWOLwQICJtX5+9L5jmLeqlU+9dQYNFZ6S9x0sVT43X7/4CBZv7aIllOCoCdVcetyE4Q5Leih3vhER6Y9yjojsjSl12Nd+HTyXfOZaa1tLef3cuXPtwoUL+31NWzhBLJXB43TQUOntszHDWJTKZHHvpdlDNmvJYnHt5+28/o6dzmZxYHAM8H2OpdIEY2myWUvA66TGP/Aiqb+4hkMma2kLJ0hmsvjdzpLnlB1sjDGLhuODxmDkGxEZ+UZDzlG+ERkbSs03o6rJBXDQfqjtr9BwOAwO9r/Q7O/Y+1u0+d0u/O4D+/UaScUV5IZoNlerc6CIiIiI7N1gf4K1wOPGmEXGmGv7eoEx5lpjzEJjzMKWlpZBDkdExjDlGxEZSv3mHOUbkYPXYBdYb7HWngBcCHzWGHPWni+w1t5mrZ1rrZ3b1NQ0yOGIyBimfCMiQ6nfnKN8I3LwGtQCy1q7Nf/fXcBDwMmDeT4ROXgp34jIUFLOEZG9GbQCyxhTYYyp2v098HZgyWCdT4ZeJpOlM5oknckOdyhykFO+EZGhpJwjIv0ZzCYX44CH8msnuYB7rLWPDuL5xiRrLS2hBC3hBM1VPpqqhq7JR2c0SUsogdftoC7gKWqJ3x5J8tBrW/jn4h2cd0QzHzhpCvU92qmHEyk6oymiyQxNlV7q9mi13hZOsCuUoDbgpj7gwVvGNaVaQgl2heI0VXpprPQOuAOijErKNyIylJRzRGSvBq3AstauA44drOMfLFpCCS79+fNs74ozud7PA58+neaqwe9kF0+luXvBJv73sZUA3H7VSZxzeHNh+1PLd/Lff18OwMKNHXhcTq46fVqhbf7SbUE+dNtLZC188q0z+Ny5s6nw5n7dOqJJvvbQYh5buhOvy8EjN5zJzKbKssTdEorzwdsWsLYlTGOlh0c+d6Y6/x0ElG9EZCgp54hIf0ZWH2zpJZJIs70rDsDm9hixZGZIzhtNZnls6Y7C438u2U42m1szLZnO8uyq4o5Iz61uIZbqju3JZTvJv5wnl+0i2iPuVCbLMytz+yfSWV7f1Fm2uOOpLGtbwgC0hpO0RZJlO7aIiIiIyL6owBrhqnxujplUA8BJ0+oKd4EGW6XXyTVnzsAY8LkdfPTUqYWhdh6Xg8tPnFT0+stPmERlj9guP3EyFZ7csL9PnDmdKl/3Np/byVVnTAOgsdLDqTPqyxZ3wOPkrENz3ZoOG1dF40G6bpqIiIiIDA9jrR3uGAq00nnfWsMJ4qkMfrdzSBdajiTSBGMpjDHUV7jxuLrnSXVFUyzb3sVfXt/GhXPGc+ykWmoD3fOsUpks7ZEkmaylyucqmr8Fufld4UQaj8tBU6WX/Dj2smgLJ4ilMvhcThqHcM6a5JS6yvlwU74RGRtGQ85RvhEZG0rNN0NzO0QOyHDdhanwuvZ6x6wm4Oa0mY2cNrOxz+1up4Nx/cx9qg14igqychrKIlREREREpCcNERQRERERESkTFVgiIiIiIiJlogJLRERERESkTFRgDZFUOks4kR7uMEREREREZBCpyUUPsWSGaDJNpc+Ft0fHvFKkMhmCsTR+t5PAHo0hdnTF+PW8daxrjfKhkydz8vT6ATV4iKfSRBIZKr0uvO6BxdWfSCJNVyxFZzRJU5WPpj467nVEkjgdhmp/cRfAdCZLWyRJSyhBU5WXukBxl0GAYDxFOmOpC7jL2iWwP5mspS2cYFcoQWNlLq5yvmfRZJpYMkOVz9XrekVEREREVGDldUaT3PXSRh5dsoOrTp/GhUePp3KP1uJ7E0uleWFNGz9+YhWnzmjgs+fMor4iV0C1hOJcdusLhcWC/7VyF9+77Gg+cNIUnI59Fx1d0SR/XrSFh1/bygdPmsylx03sVezsj2zW8tK6Nq65cyFZCxNr/Tz0mdNp7tH5b0NrhC/d9wZVPhc/eO8xRV0Bt3TEuOTm+YQTuaLyL9edwaHjqgrbd4XifP2hxbSGkvzw8mOY1Vw5JEXW9q4YF/9sPl2xFF6Xgwc+fTpHT6wpy7E7Ikl+/dw6nl3VwqfPnsk5hzUP2bpkIiIiIjI6aIhgXls4yf89voql24L8+/1vEoyXPpwvGEvzybsWsXRbkN/OX8/alnBh2+b2WKG42u2389fTEU2WdOzOWIr/+cdylm4L8s2/LCUYT5UcV3/ao0m+/88VZPPLoG3tjPHsqpbu80aTfPmBN1m0sYNnVrbwi2fWks2/OJXJ8qt5awtDHmOpDD99chWxVKaw/90vbeKJZbt4bXMnn7/3ddojpV3vgbrj+Q10xXLvUSKd5UePryRcpvdse1eMW59Zy9JtQT73x9fK9rMQERERkbFDBVae1+1g9w0Wr8uBYwB3WwwU3cmo8nV/7/P0HkYW8Lgo4eYVAB6no3Cny+00uErdcR8cBqr2uPtS2+POmNNhih43VHpw5M/tMFDrLx7iWFfhwdnjPWuo6N5e43eXdLeuHOoqiuOqLeO5A57u98vvdg7od0REREREDg4a35RXG/Dwh4+fwiNvbueDJ0+mvqL0YXgNlV4e+PRp3PHCBs6c1cSEGn9h27gqLydOrWPRxg4gV5x87aLDqa8obTHcGr+bP11zKg+/tpXL504q2+K89RVevv/eOVzx25fZFUpw3hHNHD+1rrC9yufmfy47mmkNFdRWuPng3MmFbU6Hg6vOmMa81S0s3RZkdnMlnz1nFh5Xd71+ybGHkMpm2dYZ49qzZg7aosJ7ev/cyTyxbCevb+5kemMFN15wGH5PeX7NGyo93H7VSTy1YicfPWVqYRioiIiIiMhuxlo73DEUzJ071y5cuHC4wyi71nCCNzZ3sqE1wrlHjKO5yjsi5u7sbgiRsRa/2zngIqgtnCCRzuJxOmjso0HGcGmPJImnMiMuroOFMWaRtXbucMexL2M134gcbEZDzlG+ERkbSs03w/8p/yDQWOnlvCPGDXcYvTgdpqipxUA1VI7M4kV3lkRERERkuGgOloiIiIiISJmowBIRERERESkTFVgiIiIiIiJlogJL+tUZTWq9JxERERGREqnJxQjQHkmyqT1Ktc9FU5WXKl/pLeKDsRS7QnGiyQyT6wK91oHqTyKVoTWcZFtnjOlNFTT2aFoRT2VYuq2L7z+yggqfi29dciTTGyoKa2EB7ArG2dAWYWpDBU2V3qJtoXiKllCCYDzNlPrAgBpPJNMZ2sJJtnTEmNZYQdMAOwHujmtKfQWNlR5czu6/I4QTaVpDCTqiSaY2VKghhoiIiIiUlQqsYRZJpLn5qdXc/sIGAO76+MmcObup5P1fWtfGtXctAuAzZ8/k+nNn4+9jceO+7AjGOf/H80hmspw4tY7bPnZioTNgZzTJh25bQDKTBeCjOxbwl+vOoLkq13VwVzDOpT9/nu1dcRoqPPzzhjOLOhK+saWLj/5mAQBXnTaVG99xOJUltqZvCSU578fPEE9lOfKQau78+MlFxV//+ya4/Jcvsqk9So3fzeNfOItxPeJauSPI5b98EWvh8hMn8a1LjqTaX3pBKyIiIiLSHw0RHGaxZIbn1rQWHj+7sqXkfa21PLV8Z+Hx/DWtxFKZkvdfuSNUKKBe3dRBJtu9JlpXLF3YBrC9K04q0709lsqwvSsOQFskSTiRLjr2syt3dce1to14svS4NrRFiKf4svACAAAgAElEQVRy5162PUg6U/pabfFUhk3t0fw1pGiPJIu2v7C2jd1Lv724to1EuvS4RERERET2RQXWMKvyu7jhvNk4DFT7XXzw5Ckl72uM4eozplPpdeF0GK47ZxZVvtLuXgEcO7mWKfUBAK49cwZeV/e+dRVumnsMzTtxah1+d/evS6XXxdmH5e60nTK9rtddoA+eNIUavxuHgevPnUWVv/SbpYeOq2RmUyUAHz1lCj536b+mFV4n7zg6t+bYsZNqaNrjzte7jp1AfYUHY+C6c2eNiAWfRURERGTsMNaWfndgsB2sK51HEmmC8RROY2io8OB0ll5QpDNZ2iNJshZq/O6Shwfu1hJKkM5m8bud1Aa65yNZa9kZTPCX17dS5XNx/pHje82FaoskSKayeFyOXosOZ7KWtnCCjLVU+9wDLmRawwlS6Sw+j5O6wMDmSbVHkiRSmT7jymYtreEEWWup9LqoHMB8NylNqaucD7eDNd+IjDWjIeco34iMDaXmG/35fgSo8Lr2+06Ky+komvs0UHtrIGGMYXyNj0++deZe922o2Pu8KKfDHFBcpc656kt/jSscBxiXiIiIiEh/NERQRERERESkTFRgiYiIiIiIlIkKLBERERERkTJRgSX9CsVTRPZowV6qaDJNMJYqc0QHLppI09VPXF2xFLEBtJUXEREREdltzDW56IomSWWy1AQ8uAfQje9AxVMZQvE0PreDqj460wVjKRLpLDV+Fx5Xcae/1lCCrZ0xAh4njVXeoq55iXSG9nCSTe1RpjQEaKjw9Nq/I5IkYy0NFR6MMYXnrbW0hBJsbI8yrspLXYWnKLZQPEVHNMXOYJyp9QGaqryF/YOxFEu2dfGLZ9YS8Dj54vmHMrWhAp87d+5kOkN7JMnGtihT6gPUV3oKbd6T6Qyb2mP85MlVdEaTXHPmDI6fUkuNv/RugKF4ingqS5XPVTjnbm3hBDuCcRzGMK7aS/0ezTaiyTSRRAa/x1m0uHE6k2VzR4yfPLGKlnCCq0+fxsnT6wvdEzujSeavaeXulzZxSI2PG942m0l1AZwOQynaIgl2BhNgLeOqfUUdDNOZLG2RJBvbIkyo9VMX8KhFvIiIiMgYNKY+4bWGE3zl/jfZ0Bbh+++Zw/FTanE7B9a2fH9EE2meWrGL/3t8JSdPr+c/Ljy86EN/WzjBNx5ewoodIf7z0qM4ZXp9oRhpDSe48vaXWbotCMAnz5rB9efNotKbK4TWtUS49JbnSWay+NwO/n79mcxqriwce0dXnOv/+CqheJqbP3Q8s5orC0XSrlCCS26eT0soAcDNHzqei+YcgtNhyGQt81a18Nl7XgNy3QT/fv1bGJfvsLd6V4gP/3pB4Tz/WtHC0196K5Py62Ztbo9x8c3PEU9l8TgdPHzdGRx5SHX+epNckt8G8PyaNn7/byfz1kObSno/2yNJfvDochasa+eL5x/GeUc0F4qR9kiCz9/7Os+tzi3O/O7jJvCddx1VKJKCsRQPvbaF3z2/gQuOHM+nz55JXb6rYGs4yTtvnl9YFPnFtW384iMncOGcQ8hmLU8u38mN971ZiOPJFTt54gtvLbwn/emIJPnag4t5bGlu4ee3HzmOH7z3mMK5dwYTXPizeQRjaRwG7v7EKZw2s7Gk90NERERERo8xNUTwyWU7eWrFLta2RPjsPa/RERma4WmhRJob/vQaG9qi/HnhFlbvDBdtf3FdG/9csoP1rRE+84dX6Yp2x7UrGC8UVwC/nb+eSCI3PC2WynDL06tJZnKFSjyV5ZfPriWVf2yt5dZn1vDKhg5W7AjxlQcW09nj2M+uaikUVwA/fmIV7ZEkkCsIfvzEqsK2llCCZ1buyp8nw2+eW190DclMln8u3QFAKpPlV/PWFgqoZCbLzU+tLgyre25Na2Fb4bqeW0coXtrPY82uMPe+soUNbVE+f+9rhHoMUQwnMoXiCuDh17cR7TGcLxRP8+2/LmNjW5TbnltHa7j7+l/d1FEorgpxzV9PZzRJRzTJHS9sKNoWjKVZvj1IKSLJdKG4Anh82U4iye5z3bdoM8FY7nHW5n4WndFkSccWERERkdFjTBVYPe80NFd5cZQ4tOtAGUPRsL4912Fq7rHWVHOPYXgAfk/xTcQqX/djl+m9ZtO4ai+7L8sYw6Q6f2Hb+Bofbmf3sZv2WEuqvsKDK7/d5TS9FvDdvfaU05g+18dqqvQUtu95V2dctQ9XPrC+1qFqqPTicpT261ZX0T2MsTbgoccl4XYaerx9eF0OHD2ecDkMFfnFlp0OUzQMry7Qe+hmfYUHl8Pgdjqo8ffe3tdzfXE5HIXr3x1Hz+vd8/1qrPQWfhYiIiIiMnaMqQLr+Cm1/PzDJ3DDebP57ZVzD2ix2oForPDywGdO57pzZnL3J05hXE3xh+lDx1Xx6ytO5PpzZ3HXJ04pKl7qAm6+eP5svC4HDRUebvnwCdTnCwy3y8Gn3jqT4ybXAnDStDquPH0azh4f3C8/cTLff8/RfPmCw/ivdx1FZY85VsdMquGy4ybiMDClPsAP3junUFTVBjz84L3HMKU+gMPkhtrtPo/b5eDas2ZQ7e8uTqY2BDhjVm6In8NhuOK0qZw0rQ6A4ybX8umzZ+J25eI6dlIts3sMY6zwOLnhvNn4PaUN1xxf7eOeT5zCZ8+ZyYOfPr1oQeNKr4vvXno0AY+Tap+LH7//2KI46ys9PPTZM/jsOTO571OnUdujQJrdXMVRE6oLj31uBzdecBiVPjfVfjdfu+gIPD3m7Z0wtZbJ+SGR+1Llc/HDy4+h0uui0uvi/713TlGxfP6R4zj38Nz7d+i4Sr5+0RGFYaAiIiIiMnYYa+1wx1Awd+5cu3DhwuEOY8hFE+nC0LW6it7NOdojCVIZi9tpejV02JdgLEUslcFhcndN9myC0RpOkLXgdzup7lGM7G7KsGBdOxVeJ8dMqu11V2t3XC6HKWroALnGHUu2ddEZTXHqjAYaKsvXdCSeSheG29UGPHhcpR+3NZxg+fYgLaEEp81sKGoakkhnaAsneXFtG4fU+Dh0fNWAivREOlMY/lnjd+PdozlHZzRJIp3F6TBDVvwPB2PMImvt3OGOY18O1nwjMtaMhpyjfCMyNpSab8ZUk4vRKuB1Eeino9xAi6qeqv3uosKpJ2MMTVV9N3BwOR2Mq/bxruMm7FdcjVVezj6seWDBlsjnduFz79+vbmOllzNn991sw+tyMqHWz3tPnLRfx/a6nDRX7/0uXW2g9C6KIiIiIjI6jakhgiIiIiIiIsNJBZaIiIiIiEiZqMAaJbLZkTNXTkRERERE+qY5WCNcazjBv1bs4oW1bZx1aCNnzm4a0w0SRGTwxZJp4qkstQF3UeOb3TqjSVwOU9SVVEREREqjAmsEaw0nuPJ3LxcWIn7ota0cP6WW31wxt1fXvr1pCSVYuq2LaQ0VjKv29lp3a39ls5ZdoTjLtgc5fHw1zVVeXD26BHbFUmztiNERTXLEIVUH1KhD+pfKZNkVTLByZ5AjD6lhfE3fjUvk4NESirNka5CZTRU0V/vw9ehouSsY5wePrWTtrjDXnjWDt8xqLDTCiacyLNnWxQ8fXUmN38W3Ljmq5KUKSosrl4+m5vNRoEz5aDAlUhl2hhKs3RXmqAnVvdYmFBER2dPI/7/bQWz1zlChuNrttU2dbGyPllRgtYUTXH3HyyzZGsTpMDzxhbOY0VS5z/1K0RJOcMnN82kNJ6nxu3n8C2cVLaa7aGM7/3ZHriXtB06axDcvPlJ/DR8k7ZEkF9w0j3AizfhqH3+97gx9CDyItYYTfOQ3C1i1M4zbaXjqS2czJV8kheMp/uvvy/j7m9sB+Mzdr/LkF99aKLA6Yyk+fNsCkpksABvbotxzzalluWveFk7w8d+/wptbunA6DI9//ixmNpcnHw2mlnCC83/8LIl0lhmNFfz5U6dpFIGIiPRLc7BGsHWtkT6f39jW9/N7ymRtoUDLZC2rd4XLFls8laE1nARyd6si+XW8dntpbXvh+0UbO4mnsmU7txTrjCYL66jtCMZJpPVeH8wyWcuqnbl/66mMZUOPPBJPZVi2xx9tNvTIJ5FEulBcAaxtiZAt01qJmaxlydauHjGGynLcwbalI1b4N7WuNUI6o/mwIiLSPxVYI9ipMxr6fP7EKXUl7e9zO/nS+YcCMKu5kuMn15Yttiqfi4vnHALAOYc1UbPHWlsfPnUKjZUeXA7Dly84jGq/bpYOlsZKL6fNqAfgPSdMpKKfNdVk7PO7nVx3ziwAjjikiiMOqS5sqw14+NhpUwuPa/xu5kys6X7sczO9saLw+L0nTCSwn2vO9YrL4+TGtx8GwMymSk6cWloeG26zmis5emLuPbzmzOn4PXtf605ERATA2DL9dbIctNJ5sa5YinsWbOJHj68kk7W4HIZvXnIk7zlhIlUlDrcLxlPEkxkcDlP2YS3tkSSpTBa300F9RfEiutZaWsNJrLVU+V34y/QhTfrWHkmQylg8Lgd1w7ygcamrnA+3sZxvgrEUsVQGl8P0Gk7cGU2yoS3K+tYwp81opKnKi9PR3eiiJZTglQ3t1PrdHDa+quT5ngOJyzkI+WgwtYUTpLMWn9tBjV8Lho80oyHnjOV8I3IwKTXf6FPvCFbjd/OxU6fy7uMn0B5J0lDhocrnHtAdimqfm+pBmvu0Z1HVkzGGpqrR8wFqtFMTEemp2u8uzKvaU23Aw3EBD8ft5Y52U5WXi/J3p4cyrpGsnEWmiIiMfSqwRrhKn4tKn4tDavzDHYqIiIiIiOyD5mCJiIiIiIiUiQosERERERGRMlGBJSIiIiIiUiaag1UmsWSGUDyFx+Wgto8ubp3RJMlMlmqfG597YG1+Q/Fc562A29nnYr2t4QTWWuoriruB7UtXLEVrKMHmjihHHFJNY2Xx/pmspT2SwJj96/jVFk6QsZb6gAeXc2C1fFc0SSKdpcrn7tUWOZu1tEWSGEOvuJLpDG3hJMu3B5nVXElDpbdXU5D2SK4jWF3Ajdvp7LV/ZzSF2+mgrp8mHoOhPZIknclSG3DjcakVtIiIiMhopDtYZRCOp/jL61u5+Ob5fPXBxbSFE0XbW8MJPvfH17jkZ/OZt6qFWCq9lyP11hFJctOTq7n4p/O59Zm1dEaTRdu3dsa48ncv855fvMCKHUGy2dLa7mcyWZ5YtoPzfvwsV93+ChfcNI+WUHfc2axl5Y4g7/nFC1zx25fZ2hErOWaAHV1xrr7jFS77+Qss2dpFJlP64rdt4QRffWgxF/9sPg+/vpVwIlW0fW1LmPf98gU+/OuX2NQeLdq2tSPO2T96hn/7/ULO/tEzLN3WVbR9ZzDOJ36/iEtveZ5XN3aSymQK2xKpDC+tb+edt8znU39YREsoPqBrPhC7QnE+c/ci3nnLfF5a104ildn3TiIiIiIy4qjAKoNIMsNXH1pMSyjBP5fsYNXOcNH259e0Mm91K7tCCT5/7+uEYqUXWF2xFL+dv56WcIJbn1lLsMe+1lp+8cwalm4Lsrk9xjceWkJXLNXP0bq1R1P8/F9rC487oymeXrGz6LzfeHgJm9tjLNse5NZ/rSm5eAO488UNvLmli62dMb7ywGI6SowLYPWuMI8s3kFLOMHXHlpMON59zcFYiu/8bSkb2qKs2hnmR4+tJJUv3qy13PHCehLp3OOshZufXkMo3n3uh17dyqubOtjeFefG+9+gI9q9rSuW4kv3vsHOYIIF69t5bGn3+zHYnli6k5fWtbMzmOCLf36drnjp75eIiIiIjBwqsMrAYQzN+TWfjIFx1cXD1ibXBwrfT6rz4xjAMD6f24nXlfsxBTxOvO7uH5kxhllNlYXH0xoDuF2lHdvtNL3inFTX3Qre43IwraGi8HjWuMoBxT2zR1xTGwJ4BjBEsLnKi8mfqqnSi9PRva/H5WB6Y4+4mitx5l9sjGFqj5gBJtb6cfc49/Sm7u1T6gNF21wOR9F7MK2h++c22Hq+15PqArgG8F6LiIiIyMhhrC39rsRgG80rnW/piPLokh2cOLWO2c1VVPq65/10xVIs29bFsu0hLp4znvEDWNMqkcqwoS3KvFUtnHN4M1Mb/EXzhjoiSZ5f20pXNMUFR48f0FypDW0RPnXXIta2hHn/3Ml86fzDqK/snnfUFk7w6JIdVPvdnDGrsd+FhffUEUmyYH0bu0IJLjr6EBoHsOhwOJFm9c4QizZ2cMFR45lU58eY7oKjLZzgiWU78bodvPXQ5qK4WsMJvv7QYp5YtpMTp9Zx84dOYHyNr7C9M5pk0cYONndEuXjOhF6LIe8KxvnH4u3MaKrk2Ek1fc6nGwyd0SRvbOli7a4wlxxzCM3Vvn3vNEKVusr5cBvN+UZEuo2GnKN8IzI2lJpvBr3AMsY4gYXAVmvtJf29Vglo6LWFE2StJeBx9WoGMVoFYyni6Qxux9A3qpDh/bCjfCNy8BkNOUf5RmRsKDXfDMUn6huA5UD1EJxLBqhhP7oDjnTVfjfV9O62KAcF5RsRGUrKOSLSy6DOwTLGTAIuBn4zmOcREVG+EZGhpJwjInsz2E0ubgK+DOy1R7cx5lpjzEJjzMKWlpZBDkdExjDlGxEZSv3mHOUbkYPXoBVYxphLgF3W2kX9vc5ae5u1dq61dm5TU9NghSODwFpLPJUZUPv2nuKpDOls6etjieyN8o2IDKVSco7yjcjBazDnYJ0BvMsYcxHgA6qNMX+w1n50EM85JoXjKaLJDBXegTeiSKQydMVSeFyOPjvidUVTJNIZavxuvG5n0baWUIJtnTE8LgfNVd6i+VqtoQSPLNnO82taOWV6A+889hCaqkrrfNceSfLqxg4eeHULU+oDXHH6NMZVeXGV2Mq9JRRnR1ccp8PQXO0bUOdEGbOUb0RkKCnniMhelfRp3RgzDvgeMMFae6Ex5kjgNGvtb/e2j7X2q8BX8/ufDdyoxNO3tnCCVza001DpZXZzZVEh1BpK8N//WMbza1o5+9BmvnrR4SU3pmiPJLnrpQ3c/dImZjZX8qP3HcvE2u4W8ds6Y9x43xus2RXmw6dM4YrTphVanu/oinP5L19gS0cMgBOm1nLbx+bSWOmlNZzgyttfZum2IACPLd3J3Qs2ce+1p+6zHXsonuKmJ1Zx50sbC8/ds2ATj37+LCbW7bt9/a5gnA/c9hLrWyMAHDWhmt9ffXLJbeC7YinWtYTZ0RXn5On1vd7L1nCCBevamFIfYGpDBdX+kd8sI5nOsK0rzhubO5k7rZ4JNb6itvaj0UBzjvKNiOwvfcYRkXIrdYjgHcBjwIT841XA5wcjoINNKJ7iu/9Yzqf+8Crv++WLvLC2rbAtncly50sb+Mvr22gNJ7n/1S3ct2hLyUPyVu8M8ZMnVrMrlODFtW189cE36YqlgFwr868++CYvrM2tVXXTk6tZuSNUOO/vX9xQKK4AXt3YyWsbO4Dc+lm7i6vd1raEWba9+Lm+hONp/rBgY9FzoUSa219YTzrT/3BBay33L9pSKK4Alm4LMm916WPb39zSyWW3vsCn736V/3hwMZ3RZGFbRyTJ5//0Gp+95zXeecvzrNwZKvm4w6k9kuIdN83jhj+9zqW3zKcllBjukMrhDpRzRGRo3IHyjYiUUakFVqO19s/kJ3Jaa9NAptSTWGuf2deaNAerRDrL0m1dhcevbeoofJ/MZFm+vfhD/rLtQdIlFlhrdoWLHq9riZBMZwrnXdejUOn5+lQmy9o99gVY3ZJ7bntnvM/zbWqP7jOmeDpLX+FvaI2QTPdfYKWzts+iZ9UACqE3t3S/18u3B0n2KOpS2eL3e/m2fReMI0FXLEU8lbuO1nCSxD7ex1Fiv3OO8o2IDJA+44hIWZVaYEWMMQ2ABTDGnAp09b+LlKLG7+JblxxJwONkUp2fK06bVtgW8Li45swZ7B7t5TDw8TOm43GV9mM75/BmKjzd86quOn1aYfhhbcDN1adP73EuJ+ce0QyA3+PiAydNLjqWMXDBkeMBOH5KLX2NQDt9ZsM+Y6rwOmnqY4jjZcdPIrCP+WVup4MPnTSl1/OXHjdxn+ftPs9EpjdW4HM7+PY7j6KmxxDAKp+L77zrKLwuB7ObK3n7UeNLPu5waqz08PYjx+FyGK48bSqVvjGxYLRyjogMFeUbESkrY+2+74YYY04AbgaOBpYATcDl1to3yxnMwbrSeSKda0RhMDTtMZcoFE+xM5ibo3XK9HrGVftKbnSRymRpCSWYv6aV2c2VTGusoK7H/K7OaJINbRFW7QzzllmNNFV5cecbTXRGkzyyeAe3zVtLwOPiKxcezolT6qj0uQjH0zy2dAdff3gx8VQWr8vB1y8+gncfN3Gfc5YymSzLtoe4+o6XaQ0nMQYuP3ESX73wcOor9j2PqjOa5Knlu7j1mTW4HA5uvOBQTp3RQJWv9LlSraEEWWup8rvwu4vfy2giTTiZxmHMqGqe0RFNkspk8bqcRUXjcCh1lfN9HGPQc87Bmm9ExpoDzTnKNyJSqlLzzT4LLGOMAzgVeBk4DDDASmttqhyB9qQENLJkMlnao0kcxvRqBhFLpQnG0kQSaSo8Lqr8LgKe0gq/TCZLWzRJOJ7G73YS8LoGVBRkspb2SK44a6jwjPqGDmNNGT7sDEnOUb4RGRsOJOco34jIQJSab/b5idhamzXG/NxaezywtCzRyajgdDr22nrd7+5992cgx22u8tFctZ9xOXrf6ZOxQzlHRIaK8o2IDIZS52A9ZYx5r9GtAhEZGso5IjJUlG9EpKxKLbA+CdwHJIwxQWNMyBgzOlqsichopJwjIkNF+UZEyqqkMV7W2v0czCUiMnDKOSIyVJRvRKTcSp5EY4ypA2YDhUk51tp5gxHUaBSOp4mm0rgcDuorPPveYQzoiqVIpDJ4XQ5qAqVfcyZraQ3nOiNOqQ8wuT5Q1N1wX1pDCRZv7cLtNBx+SPWo6vYnpRsJOactnGBzRwxrLVPqA72avYTjKaLJDG6ng7ox8u8+mc7QGU3lOmn2MdexK5Ykkcp3rAwMb8fKnnbnI4/LUViOYqzrjCZJprP4Pc4BdVKV3kZCvoFczkllLF63Y0D/X4R956PWcIJs1lLhdZXcjbjnvsl0rmvwnnnwQLWEcnH5Pc5enYjbIwnaIym8Lge1AXev3/NCPnL37qBrbe6zhgVq/G68LidDJRhLEe8nH+3+WdT43XjdQxeXDJ2S/oUZYz4B3ABMAl4n13HnReDcwQtt9AjFU9z7ymZ+8sQqTphSy00fPL7sCWik6YgkuenJVdy3aAsXzTmEr11UWpt1yCWWC26aR2c016Tpy+84jI+/ZXpJya81nOCqO15mydbc6I2zDm3ipg8cd9AUtQeLkZBzOqNJvvHwEv65ZAcAbzuimf+9/NjCB5dgLMWdL27g1mfWctqMBn54+TGj/t99OpPljc1dXHvXQuoCHu78+MlMqgsUtrdHEvzg0RX87Y3tvOf4iXzx7YeNiH97HZEkP3t6Nfe+spl3HD2eb1x8RMn5aLRqCyf42kOLeW51K9ecOYOrz5h20BSW5TYS8g3kCo2r8/9/e+exE/jPdx1Z8u9xMJbi7gUbufnpNZwyvZ4fve/Yony0oyvGR36zgI1tUf7r0qO59LgJJRdZLaEEV/xuAcu3hzhlej0//8gJZfvD5vbOGO+/7UU2t8e4/txZfOLM6dT4c7/H7ZEk3/7rMv72xjYAvnfZ0bz7+ImFjsXtkSQ/fHQFf3l9G+8+fiL/fkFxPlrfGuGDt71EMJ7ito/N5dQZ9XiGoMjqiCS55V9r+OPLm7jgqPF88+IjqO/xfm3rjHHl715mVyjBLz5yAidNr8PtVJE11pQ6B+sG4CRgo7X2HOB4oHPQohplookM3/3HciLJDM+taWPljtBwhzToQvE0v39xI9FkhvsXbSkUS6VYsK696PW/m7+erlhp+4cT6UJxBTBvVQuxZKb0wGW0GPacE0tmCsUVwJPLdxFNdf+uRZJpfvT4KqLJDE+t2MXG9uhQhjcoumIpvvXXJXREU6xrjXD78xuKtgdjae59ZQvRZIY/LNhEKF721Tr2SziR5vbnNxBNZnjw1a10DCAfjVZbO2M8tnQn0WSGnz61mojy4IEY9nwDsGRrV+H/b397YxvBWLrkfSPJND94dCXRZIZ/rWxhfWukaPvf3tjO2pYI6azl239dQiRZ+rGXbO1i+fbc55oF69vZ0RUved99+esb29jcHgPg5qfXEO3xexxLZgrFFcDPnlpDONEddyie4k+vbCaWyvDHl4vzUTqT5ZZ/rWFXKEE8leU7f11a8ueMAxVOpPnt/PVEkxkeem0r7Xvkoz+9sonVu8J0xVJ84+EldB0E+epgVGqBFbfWxgGMMV5r7Qpy60UI4HAYJtX5c98bmFDrH+aIBp/X7aDal/srkt/tHNBwg8n1xe/P5LoALkdpv4o+lwOXo7vRU4XHicupxk9j0LDnHKfDFA05qfK6in73nA7DuOrcXyVdDkPzGFg6wONycOi47ukocybWFG33e5xU/H/27ju+rfJe/PjnaEu2LG8nseMMJ84iEwJhl9XSUlr2aEt7KRTooNB923s7fm1v170t95aW2UEotMxSShgFWlbIIAlZhOzYie14SbK2dPbvD9myZDuxnNiJ7Tzv1yuvl5Tjc/ToSOer85zzPN+vI32l1eu04RolQ1scNgtF7qx4lGdNvrGsvNCJvTv2VRQ6sVtEHDwKxz3eAJnzCEh/j92O/I8vm0Vioi89ujEdm3JLrNRP6D2up5UXYBlCwsTqrHbZLNKw3rXOjjcVXifWrO+xzSrhzTq3qC5x55wruOy98aiwTzyyWS3Mm9Qbv+oqC4/J3StIx6Oe3w6X3UJhn/OjuROLMo/rqwqxH8Ohi8KxM2ihYQBJkp4BbgTuJH3LvAuwm6b5keFszFguxNcaTvLWbj/zq31MKfXgGQKWta8AACAASURBVOL45rFG1w2aQ0nW7Aty6rRSakpced/i7oorPLymkYfebqS21MP/XbeYqeUFea2bUDTe3uPnB39/H7tV4qdXzGdJbYkYwzyKHG2h4e5tjHjMGSzeqLrOtoNR/vNvWzEM+NFl85hf7cv5kW4JJVm528+iycXUlrpxj4MT+0BMZuVuP2WFDuZV+3LmgSi6zsGuFGsbAiybXkZ1sRubNd/rdCNH1w1awilW7w1w6rQSqovdx+xk6nhJqTpNwQQbm0KcWVfGpGL3CVt0fRiKmx/3eAPpOzLbWyO8tdvPRxdMYlq5Z0jf44OhJCv3+FlYU8zkUndmKB1AOKGytSXE7o4YHz5pIhN8A9e4PFS7NjeHeXlbG5cvrmbWBG/Oto9GKKHwTkOQ9w6GufrkydSU9H6PNd1gV3uUHz2/nSKXjR98bB4Tfb2dPVU3aAklWbsvwGnd8cieFY+64gqr9wUIxhUuPmnCMZuvrRsmLaEkq/f6WTq1lJqS3HgUSihsb43QFklx9swKMY98jMk33uTVweqz4XMBH/CSaZrKEbZvQGO5gyUMTUrViSRVbFZpyHMldN0gmFAAibICBxZx5XZUGY4OVp/tjUjMyTfeBGIywJifXyUI49VwxpzjHW+EXKZpEk1pWCxSvztBgnA85BtvDvttlSTpCtM0/9r9uMQ0zS7TNN8YrkYKJy6X3XrEw4usVgsV3vyvvgljx2iMOaJjJQjj02iMN0IuSZL6ZRYUhLFgsLEd/5n1+J8j2RBBEAREzBEE4dgR8UYQhBExWAdLOsRjQRCEkSBijiAIx4qIN4IgjIjBBrS6JUlaTLoj5up+nAlCpmm+O5KNEwThhCNijiAIx4qIN4IgjIjBOlitwK+6H7dlPQYwEYWGhSOUVHWiSRWrRRJzXIRs4yrmhBIKqm5Q7HHkZLcazwIxGdOEUpGARhj9xlW8EQRh9DhsB6u74J4whrWFk/xzRwfzJvmoqyjA6zr+k0W74gq/X7mP5av2U1vm4e7rFzO9ojCzPC5rNPjjbGoKcf7syn51xYJxmbd2+XHYLJw2vWxYa3J0RlO8ur2DaeUFzJlYlFMHaTDBuMKqvX5ME86cUT6s7TocWdU5EEywZl+Ac+srqC7x5NQSGUtGW8wJxhVW7w1gmOaQP9OmYIKvP7mZllCSL3ygjksWTBrS92mkRFMqu9tjbG+LcOGcqn71co6UYZjsao/yjae2EJM1vnvJHJZNL8spWdEWTvHPHe3Mm+ijrjI3HsVSGvv8MbY2hzl/TmVOOmZBGAmjLd6MpHBSZWdblL0dMS6YU0nlEI57WdVp6kqwem+Ac+orqCl2Yx3CBaPWUJJ/7ehgQY2P6RUFFDiPfxwczToiKf65vYO6ykJmT/CKJB9jVF45LyVJumKA/w4DW03T7BjeJgnDxR+Tuf7BtZmK7i/deTazJxz/A7UhEOc3r+0FYNvBCF95fBN/vHFpJl17Z1TmY79ZiWHCRJ+Lv3/pzEzWwJSqc/c/9/DHVY0AfOviWdx6Tt2wXCkPxmW+8Oi7rGvsAuAvnzuN0+vK81pX1Q3+sLKB37y2B4AvnlfHnRfWH5O7Fl0JlUt+vRJFN/C57bz8lXOG7aT5eBkNMUczDB5a1cCv/5n+TG87t46vXVSP3Tb4ZxpKKHz9yc2sbQgC8J1n3uO06WWjooPV6I9zxb2rAPjT6v08cvNpw1KHJRCXuWn5elpCSQBu+dMGVn7rvEwHyx+T+eTv1rC3Mx2PXrzjbOZM7N0freEkH//t25gm1L7p4a9fOEPUhxGOidEQb0ba7vYo19y/GoBF64v5/b+dQlmeJVJ6fmNkzaDIbeOVr5yb92+MPypz9f2rae5KIknwylfOZUbl8Y+Do1UgJvO5h9ezuTkMwNOfP4OTp5Qc51YJRyLfs7+bgN8Bn+z+9yDwLeBtSZJuGKG2CUfJMEwOBBOZ501Zj4+nlq5kzvODoRSa0VuPrS2SoudpaziFnrVM0Q12tkczz7e3RnPWPRqabmY6owC7O2J5r6tque3a2RZF1Y1haddgwkkVpfu1wkn1mL3uCDvuMUfVTHa25X6mcp77tqcAZrZAbFjLBh6xxkBvHGjwxzGG6fgxTWiPpDLPNcNE1nr3l2GY7M967f194lFzV5KesozNXYlha5cg5OG4x5uRtrez9/dsnz+Grud/fMVkLXMsR5Iaipb/b4xumplYaJrpYsjCoelG7nlI9ucmjC35drBswBzTNK80TfNKYC7p8cmnkQ5CwihU4LTxsyvm43PbOWtGGYtrR8dVkFOnleYMtfrcOdModvc+n1lZyAWzKyly2/jBpXMpyBpi5HXa+N5H5zLJ52JqmYevXlSPI487Cvnweez8/MoFlHjsLJpczIfmTch7XY/Txrcuns3kUjc1JW7+/cNzhq3S/WAqvA6uWFJNkdvGl86bMV6KMR73mON2WPlm1mf67Y/MznvfFnscfOEDdZnntaUeppUXjFRTh2TZ9FKWTS/F57bzi6sWDNuwYY/TymfOmJp5fvKUkpz9VeC08fMr0/HojLoyTukTj+bX+DhnZjk+t52fXD4/57gXhBF23OPNSDtvdiVLakso9tj5xZUL8bryP75KC+xcfUoNRW4bt547fUjrFjit/PjjJ+Fz2zm3voJ5k4qOpPknDK/Lxi+uWkCxx86S2hLOm1VxvJskHCHJNAe/iiFJ0vumac7Nei4B20zTnCtJ0kbTNBcPR2NEpfPhl1A0YikNu9VCyTGaEzQYwzDxx2Q2NoWoKXFTXeym2JPbtlBCQdEMCpy2fidaumESiMtISJQXOkh/HYdHStWJHGHyDdNMvy8TKC9wHtMJ/uGEgqwZuB3W4z7PLt8q54NsY8RjTj7xpuczBSgb4mcaTqr4YzKBmMK08gIqvKNnuFswLqPpJoUu27BeCAjGFToiKZKqzuRST78hfglZIyZr2KyWAeezdcXTSUGGu13C+Ha0MWe0xJuRFowpaIZBkduOy24d0rrhpIqs6kf0G9Nz3I+m85DRLKloRFMaNouF0kKxv0abfONNvr9gr0uStAJ4svv5Vd3/VwCEjrCNwjHgcYy+ExWLRaKyyHXYO0R9O1zZrBaJSu/IzDFy2a1D/uHpIUlSZq7YseY7zP4ao0ZFzDmaz9TntuNz26kbhRcgS/OcezH07ToOmwjE47TlJL3oS5x8CcfJqIg3I+1oTtZ9bjsc4RzSwY57IZfbYcM9ys7bhKHL9xP8InAFcFb38+XA02b69tcJk4VHEIRjRsQcQRCOFRFvBEEYVnl1sEzTNCVJWgkopMclv2PmM7ZQEAThCIiYIwjCsSLijSAIwy2v7ACSJF0DvEP6tvk1wFpJkq4ayYYJ45dumHREUzQFE7SGk0RT6vFukjDKiJgjCMKxIuKNIAjDLd8hgv8BLO2pByFJUgXwKvDUSDVMODZ6JpUfyaTXIxFOqry+s4OfvrCDtkgKiwQXza3iBx+bJwqLCtlEzBEE4VgR8UYQhGGVbwfL0qfYXoD8U7wLg+jJXCdJQ8+Kl85ypmCaJsUeOw5bbydJ1XU6Igqv7+xg1gQvdRWFmUnkmm6wrzPOf/xtK42BBB85aQK3XzBzSIU9ezL92W0WSvokWQjEZLY0hwnGFc6aWU5VkQvDMHlzVyd3PLYp83eGCf/Y1s7OtihP3nYGFV4noYRCoz/OewcjnFtfQaXXiXMInb+edh0qU1lnVMY0+2dQ0w2TzmiKN3Z1UlPiYc5Eb79kAD3Z11x2a7/q6ppuEEykax2VehzYjkGR4WPBH5MxDBOPw0bhENLzHqVREXN03SBwmM+0J7PWob5rI9au7kyaACUeO3br8F0ckTWdcOLQ8ajnoozDZumXjCY7k2ax2zGkEgrhpEprOMm+zjhLaksoL8zd34qmE0qoSNLQMzoKwiBGRbwB6IimkFUDj8PaL5NtQtEIJ1VMM510om+G3a64QlLVsVv7J+dRdZ1gPB2vCl32YxqvxqNIUiWSUrFI0oCfxWB6zkO8bhtuu0hoMR7l+6m+JEnSP4C/dD+/FnhhZJp0YlF1nbX7Atzypw2UeBw8cdvp1JZ68l6/wR/n2gfWEEtp/PHGpZwypSRzUtIRUbjwV2+QVHUAvv3h2dx45jQcNgvBuMJV960iktIAWL56P6YJ3/7IHNyOwU/WgnGZHz+/nWc2tvCheVX812XzMz8G4aTCD/6+jee2tAIwocjF328/Ewn4+Us7BtxeYyDBtoNhzp5Rzgtb2/jOM1sBcNosvPyVc5hSll8Noa64wi9f3smj7xzg3PoKfnn1wpwfqZZQkuseWE17WObX1y/iA7MqM3fu/DGZi//vLUKJ9JDFG8+cyjc+OCuT/cgfk7n9z++ypiHIzWdN44vnzcg5wdzZHuUTD67FxOQvNy9jXrUvrzaPZh2RFJ/6/Vr2dsb5fx+bx+WLq49VfaJREXN2tse4/sE1mJg8etMyTqouynQ4QgmFe9/YywNv7uPUqaX89pNLhnSB4mjs88e47v41yJrB8s8uZdHkEqzD0OFQdZ13GoJ87uH1FLsdPH7rspxjLxCT+f6z23j+vVY+tnAS3790bs5FiMZAgmvvX000pfGHfzuFpVNL87rQYBgmr+3o4M7H0xdfitw2Xr7zXCb40ieJum6wqSnEv/1xHQUOG4/duoy6isKjfr+C0G1UxJu2SIqr71tFUzDJqdNKuOeTJ2diiqLprNzt5/OPvotpmvzvtYv54LyqzO9XICbzvWe38fzWVqaWeXj81tOpKurtZO1uj3PlvatIqjqXLpjEDz8+T2TtPEJxWePP7xzgZy/uwCLBPZ9cwvmzK3MucB9Oc1eCa+9fgz8mc88nl3D2zPK81xXGjryu0Jim+Q3gAWBB978HTNMcF8X3jrdwUuO/XtiOrBm0RVL8aXVj3uuapskDb+6jMyqTVHV+8sL2TIcJ4LWdHZnOFcCf1uwnnExfjW+LpHL+FuD5ra1E5fzmQ8VSOn99twXThJfeayec7F0vqRis2Nqaed4WSdHSlUTRDJq7Dl3F/Y2dnXQlVZavasz8n6wZrNhyMK82AcQVjUfWHsA04fWdnXRG5Zzlf9vYQlMwiaIb/GjFdiJZ7d7aHM50rgAee6eJmNK7j5q7EqzeF8Q04cG3Gkgovfs2oWj86pVdhJMqkaTGL1/ZRVzO3b9j0dt7/exqj6EbJj9a8T5x5di8p9EQc5KKxl2vZn2mL+/Mef8JRef+N/ZhmrC2Ich+f/yYtEvVDX7zrz0E4goxWePnL+0kNkzzGCNJjf96fjspNR2Pso9FgEhKY8XWVkwTnt10kGhWDDFNk9+9tY+O7nj0Xy9sJ5Jnu0JJhd+vbMhpx9qGQM7r/uzFHSQUnc6YzH2v78UwRA4CYXiMhngDsK4hSFMw/Rv5TkMXXXElsyyaSv/G6IaJYcIvX9mZc/zFZY3nu393GwMJ1jUGM8s03eCBt/Zmzgee23Iw5/dLGJq4rHH3P3cD6VE4v/7nnpzPYjCPr2uiJZRE1gx+/Px2wsmxf64g9Jf3LXDTNJ82TfOr3f+eGclGnUicNgvza4ozz0+ZUpr3upIkceq03r9fVFOMK2tITn2VN+fv6yoKcHZfJek7pA+gttSD3ZLfV8Jpt1DiSQ+R8/YpBmy1SEzqM5+qvNCJzWrBcZir2ZNK3LjtVmZW5V6Znjsx/ztBDqslU9DVbbf2u0K3aHLvvp5fXYQza3/1vXM4vaIAa9bwqEqvK/P31cVu7NbeZQ6rhSW1vds+eUpxzrbHqllVRfTsgnnVRdjy/H4Mh+Mdcxw2CydPKck8XzKlBGfWUDy7VaKmJP09d1gtTDhGcwjtVgtLp/Ye94snFw9pCO3hOG0WFtT0Hm+nTM2NRx6HlaLuYaK+PvM2JUnKadeimuJMvBmMy2Zl1oS+8ao3DrjsFhZmxcml00rFEEFhWB3veAP9v/PZBX2dNgtzJhZlns+Z4M0Zguu0Wyn29P599rZsVgvzq3uPn7ICR87vlzA0NqvEjMre/TtrghenPf/fxsW1vb8rC6p94+JcQehPOlwmUkmSoqRTlvZbRDqzadEAy47YaKh0fjwE4zIbD4QoK3AwrbwQnyf/Yn6hhMLezhiRpMbCyb6c4TpdcYXH1h3gkTUHqKss4GdXLGBScfokMJxUuOe1vdz/5j4ACp02nrj1dOZOyu8j1Q2TtkiKLU0h5tf4mFDkygwFMgyTPR0xvvX0FkJJla9/sJ5z6yuwSBLff24bT65v7rc9q0XizW+cR3WJm7ZIiv98ZivbDka4+uQa/u3MqXkXRjXNdLs2HQgxr9rHhCJnzq33SFLlQDBBazjFktrinOGD4YTC81tbufeNvUzyufn5VQuYmjU8SlZ1WsMptrdGWFxb3O+EuiuusL0tgmnC3IlF42L4RUxWaelK0RCIc3JtSabzejj5Vjk/xLrHLObkE296PlPDgHmT+n+mbeEUGw90MWdiERN9rmHr6AwmlFDY1R4lpeqcVF08rPMpgnGZTU1hSjx2ppUX5AyD1XWD1kiKLc1hFtb4mOBz5wxNzI1HQ2tXRyTFT1/cwfbWCJ85YyofPmlCzmsH4wpbmkMUOG3MrCw8bDFy4cRypDFntMWbaEple2uElXv8XDJ/EtPKPTm/X4GYzHObD6IZJpctrs4ZkqwbJs1dCV7d3sHJtSXUVRbkdNCCcZmX329nd3uMG06fQm2JR1ykOArtkRQPr2qkwGnjmqWThzQ8PJxQaQzE6YzJLJ5c3G+unTC65RtvDtvBOtZO1A7WSFI1g1BSwWGzpiuxZwknFCIpDX9MprrYTUmBA/swJmYIxhV0w6C0wJk5CeuIpLjxoXVsOxjJ/J3VInH39Yv5wKyKTNKJSFIlpen4XPZjdtIKvYkq7Nb+iTuE/BxNB+tYEvFm9InLGklVp8RtxzpOksQII28sxBwRbwRhfMg33ojUJeOc3Wbpl02oh8/jwOdxMHkISTWGYqCr15VFLpZ/9lQa/HHe2t1JpdfFBXMq8bnsORn9itx2isj/Tt5wsVktVB5ifwmCMLIK+gw3FgRBEISxSPySCcdceaGT8kJnznwNQRAEQRAEQRgPxBgMQRAEQRAEQRCEYSLuYAlHTNUMYrKGx2nNO1tYj0BMRtYMrBapXzYyQRDGJqO7ALJppjOV2gfIjmUY5hFPrjcME0liSMXYh7JtMelfEARBGA6igyUckfZIij+83cC6hiCLa0v43NnTM0VBD0fVDXZ3xPj6E5t5vzWCx2HlM2dM5XNnT8+Zs+WPyRiGicthpSgrE5JpmnRGZba0hLFbJOZO8uWV2e54i6U0AnGZTQdCzKwqZKLPPS6yDI53gZhMR1TGBCq9ziEXEg7EZHTDxOc+tslaBhOMy2iGSZGr/8UNVTfoSihYJKnf+03IGuGUSnNXkpoSNz5379zJrrjCS9vaeODNfSQVnStPruazZ07LZMjqycj49IZm6id4uXxRNZVFuTGjK66g6gZelz2n4HlM1mjpSvLImkbcDhs3LJtCVZ8MoYPxx2QM06TY7chJb92TkfGJ9c3UVRRwxZIaKr3OEenEHapdpmni69MuQThRxGUNRTModNkGTLSVVHWskiSOD2FMER2sUaAzKvP3zS1Uep2cNaNiWE+8W0JJnt3UwuLJxcyb5KPInX/iCH9MZsXmVrxuG+fNqsx0gDqjMlfft5oDwQQA7x4I8fyWVp67/cxDJtToEYgpXH3vKuLdRQ4Tis69r+/FY7dy67l1OGwWmoIJbvnTena2Rbl04SS+99G5mZO09ojMpXevpDOWLiA8udTN058/I5OYQtMN9gcTvLi1jXNnVTCjogB3VvKMrrjC2oYgTcEEly2e1K+97ZEUf9vYwozKQk6eUpKTCjoua+zuiLFyt5+PLpjI5FJPTorqYEzmpW3tAHxoXlWmzbpusHJPJ59/9F16knbeeOZUvnJhfebzCCUUNuzvYk9HjMsWV1NVJBJtHG/+mMzNy9exqSkMpNO0L//sqXl3shr8cb7053dpCSW57dw6rls6edhSiwfjMv/c3kFS0blkwcQhpfltCib40p/fpTGQ4MYzp/KZ06dmYk40pfLK++389MUdFLns/N91i5g7sQiLRULXDVbtDXDrIxvQDRObReL3nzmFs2ZWYJomf93YzI9WbM+8zm9f20uDP85Pr5iPw2pl+epG/vfV3Znly99u5G9fOjNz7B4MJbn9LxvZ3R7lE6fVcss5dZmYs7U5xCd+tzZz/Cxf1chLd57DtPJ0GQVV12kMJPjHe+2cP7uS6RUFmY6jaZpsb41yx2Mb6UoofPNDs/nw/Al4XXZkTeepDc38+Pnedv/h7UZW3H5W5hiMpTR2tEVY2xDk0oWTmFzizul8BWIyL2xtxWGzcNHcqrxLSgDsbo9y+1820hGVueOCGVy+uGZIMVoQ8hVKKGw8EGJnW5TLFk86ZnX7BuOPyfz0he3s7YzzxfNmcEZdWSbRjWma7A8m+NmLOyh22/naB+sHPcfIFowrvLm7k2BM4WMLJ1E+hIux0ZTK+wcjrN/fxccXTaKmJDcZWCAms2JLK4VOG+fNrhzWUhnC+CAuBxxnkaTKt/+6hR+t2M7tf9nEK9vbh23bnVGZa+9fzS9e2sn1D66lqSuR97oxWeVHK97nB89t42tPbOaZjS2ZZXs6opnOVY+2SCpzIno4b+3uzHSusv1uZQPhpEIkpfLdZ99je2sUw4RnNx1kzb50RXrDMHlkTWOmcwXQFEzyyvu9+ywQV7jsN2/zPy/v5PLfvk1XQs15nTUNAW57ZAP/9cJ2vvzYJroSSu+6MZlbHl7PT1/cwU3L17OlOff9BOMKV9yT3vZl97xNIKsdqmZw3xv7+M4zW/nOM1u57419qJqRXi+h8P2/byO7IsJDqxqJK73V27e2hLlpefq1b3l4fc62heNjU1Mo5zu97WCEtd3fxcF0JRS+/uRmth2MEEqo/OzFHXRGh+czNU2TP689wDee2sL3/r6NX7y0k2TWd+lwIkmV//jbVjY3hwknVf731d0cDCczy2Mpja89uZnOqMzezhi3/mkDgXj6GAkmVL79zFZ0I/1F1gyTf//rVoJxma6Eyr2v7+33ei++10ZC1omk+i8/GE6xem8g/brd8WbD/i4iKY373tjHPn8s/bpxmZ+8sCPn+JE1g9/8azcpVe/+G5WPdx/3l/32bUJZx7U/pvCFRzewuyOGP6bwrb9uIZpK76+efZCtMyrzalYc7oimuPr+1fz3P3ZyxT1v4886NlOqzl2v7uK7z27jW09v5aG3GzGM/Eqf+GMydzy2iR1tUYJxhe///f2ceCQIw2l7a4QbH1rHz15K/76Nht8YzTD47Wt7ePrdFjY1hbj1T+sJJXt/s/0xmU88sIaX3mvjsXVN/GjFduJyfrEO4IUtB7nzsU38cMX7fO/v24im1MFX6nYwlOLaB9bw3//YyVX3rs6J3wlF4ycvbOf7f9/G157czBPrm/LernDiEB2s40zVDVpCvSc4DZ3xYdu2aZq0R1KZ523h1GH+uk+7NJOmYG+79nXGMicOPSdcfeVzAnmodaMpFZN0R6XvdlpC6c6cYZq0DvAeWrP2n6abRLsDsGaYxPoE4wOB3o5hS1cSTTcyz3XT5GDW9pv6dCJDCYWec6dQQs2caKZfy2B/sPezawzEUY30tk2gK54b2E0TFK33tbP3dUsoiT6K6tOdqNoH+K5lH0+Ho+kGHdHcvw0n8/9xPxzdMGnw937XDnQlUPT8vi+KbtAezj2+/LHeY1LWjJyOTM/wNUjHk1CfDkCw+3g2MQkl+r8/00y/pm6YyFnf9x4d3ce6qpn99m1HJL1MM8wBOx6dURm1+/hVNINE94UbRTdI5lzEMTOv09Omnr81DHIudPTIjpWBuJLZJ4G4QlbIQNWNnGO3wR9Hy7ODZZhmv+9I33glCMOlpavPb0ye39ORpOlmzrFmmORcLDLJjU9tkVTOb/Zg9mbFyaZgIhMv8pF9bHZ2Dy/uoWoGzV3Z50dxdCP/bQsnBtHBOs6KPQ5+fuUCppR5WFJbzGfOmDps2/a67PzvtYuY5HNx8bwqFk0uHkK77Pzk8pOYXl7AghofXzxvRmYC+JLaEmx9JoNbJDh7Zvmg271gduWA/3/+7EpcNiulBQ7uuGBm5v+L3DY+umASkK5R1Xf/WCS4fElN5rnXZePbH57NRJ+LTy+b0m841+WLqzltWimTS93cdc3CnCFbxW47/3ftImpK3Jw5o4wPzpuQs251iYdrTqlhos/FDz82j0JX79BDt8PGtz8yh9kTvMyq8vKfl8zJzE0pcNi4fEl1zrZmVBbm1Pu5aG4VZ80oo6bEzf9dt5hiMUzouPvA7Epc9t4Q6bRZ+OC8qrzWLfE4+OqF9ZnnMyoLmVJWMCztslktfOWieuZNKqKuopAffmxevyLih1LqcfDVi+rpGeE2pczD3IlFmeVel43TppVknn/xvDq83XMgC5w2rjq5Jmd71y6dTIHDhsduHXDfTC5143HYcDusLJ1akrPMIsFFc9LrlBQ4uPPCenrCyiSfi1OmpP++2O3gyiW5rwvwyWVTMm3zumx89aJ6Jvpc3HzWtJzj2uuy8+WsmLJkSjElnvR6bruFc+or+m374wsnZR7XVRRy+eJqJvpc/OyK+TnHvddl5/uXzmVmZSFzJxbxzYtn5z1PpNjt4M4Ler8j8yYViaHBwog5d1Yl59SXU13s5tfXLR624cpHw2W3cseFM/F2/xZeMLuSkqx2FThs/PDj85AkKHBY+c9L5mSO+Xzccs50Fk0uZmqZh59fOZ9id/7vee7EIi6ZP4FJPhe/vHphpo2QrtP5o8tOoq6igJOqi7jjgplYLeJ0WsglmaPoSvmJWulc1w2CCQWrRRrS+P18pFSdaErFYbXgG2JA7ckIZpGknDkeSVXjnYYgX3tiM/6YQonHb+601wAAIABJREFUzs+uWMBZM8socB4++IUTKr9fuY9f/2tP5v8m+lw8edvpmTHO0ZRKIKbQ1JWgvspLeaEzM9cpmlLZ2hzm7n/twWaV+NoH66mv8uYUKY7LGnFFw223DhiMu+IKmmFS7LH3m1CraDqhpIrdaskJ9D0iKRVZ1Sl05k7C79EzfKhvxy4Qk3ls3QH+sa2dk6p9fPn8mf2SgnQl0hP8i932IU3eH23yrXJ+vA0WbxRNpyWU4r7X92ACt51bR3WxO+9kFdGUSiihEkmpVBW5hpwgYzDpu0tQXugYUkKGaEolnFTpiqtM8Dn7zWnoSezhtlsp9thzTsR65hm+vrODC+ZUctHcCZm5B23hFDctX8e2gxEgnRTkkZtPY2ZlIZIk0RZO8p9/e4/XdnZSXezmhx+fx9KppZkLDXFZI5RUCcRkJvrcOclrAjGZ3762h7+804TTbuELH5jB1afU5ByjMVkjcYjjPpxQ6EqqJBWdSq8zJ561R1L84LltvLKtnaoiF9+/dC5n1JVRmLWNSFJF1nQKXTbc9typy6ZpEogpIPU/7gcTTqqEEwoxWaNyBL4jJ4qxEHNGw/lNqPs3xjeKfmM03SAQV1A0A4/TSlmfc6C4rBFLaUhS+kLMQEkwDicYTycaKi1w5syZzkc4mW5XodOWM5cbes6PFCwSQ5oDK4x9+cYb0cESjoimGwTjCrJm4LRZKCmwY7fmF7AjSZWuhML6xi4m+FzUV3mHnAkwnFCQJGlMTQjXdINISsPtsOIeRRnlhttYONmB/OONqqeHzYkMVmmmaZJSDVx2S7+OnT8mE0tp6QsFHjtlBc6c1OeRpEpK05GQhtwxTCo6UTk9DLFvJsCjFU2lO1+SJFFW4BDp2seYsRBzxPmNIIwP+cYbkUVQOCI2q6VfiuV8FbntFLntRzVkaqh340YDm9UiMg2NQUO9YjreSZI04N1bSN/BOdxdmCK3nSKO7KKI22E95OseLa/LPqShR4IgCIJwOOLMQRAEQRAEQRAEYZiIDpYgCIIgCIIgCMIwER0sQRAEYVSIplQ6o3JOCQNBEARBGGvEHCxBEIQjFEoopFQDu1Ua9kxSgbiMqpndSWSGNncvnFBIqgY2q3RMM9MFYjJbW8JEUhqnTy/tl6Ew3J3Jr2+7TNOkuSvJD1e8z77OGB9bVM0Ny6bkzFlMqTrhpIoElBU4sA5hbpxhmPjj6ayLXpctJ+vo8WSaJv6YgmGaFDhtFDpHR7uE46czmiKlGngc1n4xRdX1TE3FYk//TICDxaPOqIxhmHicA2fYPZxATEbVTRy2/nOJe5JemYDPbcc1xCROPTXtPA5rv/Tx4YRCJKVhs0j4PPZ+x24gJmeSbfV9z3p3JmTTTJd86ZsBNJJUSQwQj3r4YzKabuJ2WPANIcU7pLMVy9rI/DYIY4OI5oIgCEcglFD4n3/s5JG1BzipuoiH/u1UyoeYDfNQ/DGZLz76Lmsbgnx0wUR++PF5eZdwCCcU7nl9L/e/uY/6qkIevfm0fh2dkRBJqvzguW08t7kVgLqKAh6/5fTMPokkVZavauBXr+xmWnkBj92yLFP3yR+Tuf7BNZninXe9sgu7ReKWc6Zjs1rQdIN1jUFuemg9LruFJ287nVkTigZuyAAOBBNcee8qQkmVX1+3iAvnVuEcBWmqW7qSXHHvKjpjMv991QIuWTCx30mgcOLoiKS46r7VHAgmWDa9lN98YknOif/21ijXPbAGwzR55KbTWFJbksl4GUoo/OqVXTy8ej/zJhXx0I2n5mTnbQ0lufr+1TR3JfnBpXO5+pTJObUYD8cfk7n9z++yel+QS+ZP5IeXzctJp76nI8bV961G1gz+eONSlk0rzfsCyMFQkivuWUVbJMXnzp7Gl86fkenMxFIqD61q5K5Xd2OzSDx041LOqCvPvGd/TOa2RzawvrGLpVNLuPdTJ+fsr0Z/nCvvW0UspXH/DSdz9szyTKc0mlJ5eHUj//PyLqaWeXj81tNz6tB1RlN84sG17O6I8YUP1HHrOXX4PPl1SoNxme/+bRvPb23l9Oml3N3ncxRODGKIoCAIwhFIKDqPrD0AwHstERoD8WHbdjCusLYhCMCKLa3EZT3vdVOawf1v7gNgV3uM7a3RYWvX4SRVnRVbWjPP93bGicpab7tUnf99dTcADf447+7vyixLKHqmc9Xj75sPEk6mr9ZHUxp3vbILpbvUwYNvNaAb+Q8jfHxdE4G4gm6Y/OqVXUSS2uArHQPPbTlIRzR9hf2XL+8imhod7RKOj/dawhwIJgBYsy9ILOv7kFR1fvvaHhKKTko1+PU/d5NQcpc/vHo/ANsORmj058aj13d1Zo6xX768i7ic/3etK66wel86Hj2/NTceabrBfW/sIyprKLrBXa/syjnuB/PGrk7aIikAHnyrgZTae1wnVJ0HumOZZpjc/+Y+4lnvuSueLvcCsK6xi1BCzdn2Q6saCCVUtO7jPvv4Sio6d3XHo8ZAgvWNwZx132uJsLsjBsA9r+8lpeUfg2OyzvNb07Fw9b5gv3YJJwbRwRIEQTgCdquF6eXpUgMuu4XqEvewbdvntlPkSl9dnuhz4RxCzSebRWL2BC8ADquFaeVHXg5hKKwWibqKwsxzl92CJ2uokNUisXBycaaNs7rbCOC0WXHZc9/j7AneTFp2l93KadPLMsvOmVmO1ZL/Pjm9rnfdpVNLcdtHx0/f0qmlmcenTCkdFXfVhONnekVhphiuz23Hk1WWwGG1cOaM8szzM+rKc4qe2ywW6ioOHY8W1PjoKe+2ZErJkMpPpEurpOPRhCJXzrFqs1o4p763XafXlQ1piOD8ah895fBmVXmxZdWgs1ksnFTtyzw/eUoJrqxjpMhtx9t9F67IZcPryr0jd/bMiszjU6eW5rTLapFYXFuceTy7zx3xuooCHN37aO7Eopx2DcZls1BVlL5j5XPb+7VLODGIQsOCIAyrsVD0E4Yn3nREUuxsjzK9vIAKr7PfnIgjpekGHVGZvZ0x6qu8OUNX8tEZldnZFmFKWQGVXmfOidhIagom+MkL2wknVf79w7OZVeXNeW1/VGZHW4TaUg8VXifu7vkUSUXjtZ2dfP3JzSQUnbqKQv5006lMKu49SeyKK+ztjOFxWKku9uQ9XAfSwxPbIim64gozq7yjph5dNKXSHpHxx2RmVhaKuRpHaCzEnHziTVLRaO5KsqkpxOnTy5hY7M50uCA9DPBAMIFumEwtL6Ckz3yljmiKXW1Rppanj/vseBSXNTqiMq2hJLMmeIf0XdN0g86ozJ5DxKNwQqE5lCTZfewOZc5oXFZpCaVo8MdZXFtMZZ/hzJ1RmZe3tVFc4OCM6WU529Z0g7ZIiq3NYRbUFDOhyJkzNDGcVGkNJYmkNGZUFvY77v0xmZ1tUWpK3FRmxSNI33HviMocCMSZPaFoyMO/2yMpdrVHmVFRSIXXiU3UUxw38o03ooMlCMKwGgsnOyDizUiJyRq6YeJzD20SfU8SC0UzcNutwzafTRj/xkLMEfFGEMaHfOPNiN23lCTJBbwJOLtf5ynTNL8/Uq8nCMKJS8Sb0eNIM+G57NYhZx8ThONFxBxBEA5nJAeGysD5pmnGJEmyAyslSXrRNM01I/iagiCcmES8EQThWBIxRxCEQxqxDpaZHnsY635q7/43esYjCoIwboh4IwjCsSRijiAIhzOis+4kSbJKkrQJ6ABeMU1z7QB/c4skSeslSVrf2dk5ks0RBGEcE/FGEIRjabCYI+KNIJy4RrSDZZqmbprmIqAGOFWSpJMG+JsHTNM8xTTNUyoqKvpvRBAEIQ8jEW9U3aAjmqIjmkLV86+DIgjC+DdYzDnS85vBko+NpuRk2Y6mXYdbd7S+X0E4nGOSnN80zZAkSa8BFwPvHYvXFAThxDRc8Sal6ry9x883n9qCYZr89Ir5nDurArd9fNc06YimeGT1fiq8Li5ZMHHUpDQXhNFquGJOJKmyrjHIP7a1ccOyKdT3LXMQk/nDygZ0w+Tms6dTMYRMm6GEwpu7Onm/NcqnT5+SUwJhMAlZY3tbhJfea+PyxTXMqCzISQEfiMk8vHo/0ZTKbefWUTmEshJdcYVXtrfx7v4Qt5wznallBViyUtO3hVPc98ZeygsdXH9q7ZDSywfjMis2t+KPK9ywrJYKb/7tCidVNuwPsmZvkE+cVsuUMg+SlH8tLEEYySyCFYDaHXjcwEXAz0fq9QRBOHGNRLwJJ1W+9OeNJNX0nas7HtvEm984D7dv/HawwkmVbz29hdd2pIczmabJp8+YenwbJQij0EjEnI6ozE3L06ncn910kDe+cR4TfOmOTErV+Z9/7OSxdU0AHAwn+ekVC/LO2rmlOcyXH9sEwL92tPPnm5flXQohlFS55v416IbJn9bs5/VvnMeEonS7NN3gt6/t4Q9vNwLQGEhw17WL8i7TsLk5xDef2grAK++38+KdZ2dqYXUlFO54bCNrG4Lp1zJM7rhgZt4dnWc2HuRHK94H4P2DYX51zSKK8mxXS1eCzz60vns7Lbxwx9lD6tAKwkieKUwElkuSZCU9FPEJ0zRXjODrCYJw4hr2eGOYJimtd1igrBkY43yoiqYb+KNK5nlrOHkcWyMIo9qwx5xoSs08ljUDzTAyzzXDpCMqZ577owq6nn888sey1o0pQ4plKVVHN8zuxwaa3tsu3cxtVyAmo2e1ezCBWG+8CSdVspul6SaBeHY8SmGYJtY8O1htWfHLH1Ny9udggvHez6IroYhhisKQjWQWwS3A4pHaviAIQo+RiDcFDhs3njE1c2X2U6fV4nGO7zpNpQUO/ufqhXz1iU2UFjj4tzOnHe8mCcKoNBIxZ0qZh+uWTubNXZ3ceNY0ily9d1sKnTa+d+lcWsNJNN3kx5efhM+TfzHvc+or+Mj8CezpiPHjy06ipCD/dUsLHHz5ghk8vyU9dDG7XU6blW9dPJvGQJykovOLqxZQ4sl/WPG5syq4YHYlO9qi/Mclc3K2XVrg4K5rFnH7X96l2OPgzgtnYrXknzrg5rOn835rhFBC5b+H2K65k7xcu7SGDftDfPNDs/C6x+/IBWFkSKOpVy4qnQvC2JdvlfPjLZ94E0ooRFMapglet21IP9BjlWmaBOMKVotE8QnwfoWxbyzEnHzPb6IplZSqU+Cw4Rlg+F/PnaiyAseQ5wRFUiqKZlDstmOzDi3HWULWiCs6hS7rgPNQAzEZw0y3K3sOVT5CCQVFMyhy2/sVG9cNk2BcxiJJQ5p/lb1t3TApPYL9FUuppDSDIpctZ86ZcGLLN96ILrkgCMIhFHscJ1wnQzrCExlBEI6e12XH6zr03aXyozg2iw6z3cF4nAN3+HocTcw4XIy1WqQhJacYyrYHU+iyU3jEawsnuhFN0y4IgiAIgiAIgnAiER0sQRAEQRAEQRCEYSI6WIIgCIIgCIIgCMNEzMESBEEQMjTdoCuhYpGObl6FIAiCIJyoRAdLEAThEGRNp6u7Hkqxp3+Gq/GmK6GwYvNBHnyrAa/Lxvcvncv8ah9uR34/FapuEIyna+wUuewUZE2KV3Wd9rDMi9vamDOhiHmTiigpGL4EIoGYjKwZOGyWo0oEMJDOaApVN3HbrcPa5mBMYcP+IC2hJBefNIFKr2vIGdiE8aUzKpNSdTwO67Be4NANE39MRtUNCp22ISd/OFy7UqpOKKGmj3u3vV/x40BMJiZruOxWygudWLO+411xhV3tUXa1x7hgTiWVXmcmw6FhmLRHUjz1bjMlHjsXnzQx59g2ut9TIK5QVuCgvNCZc/wkVY1QXEUz0u3qW/y4K64QkzWcNgsVXme/LIM9x73HYe23v5KKTjipIEkSJR57vyyD/phMUtFx2a39ChSruk4wlm6X12XrV/w4nFSJyxo2q0RFYf92CWOD6GAJgiAcwuamEDf8/h0M02T5jaeybHrZuD4B3tYS5rvPbss8/8SDa3nzm+fl3cHa0xHjqntXkVR17r5+MRfNrcqceHRGFS66602Sarp483c+Mocbz5yKfYjpogcSiMnc+fgm3trtZ3FtMQ/ecArl3uE5Oe2Mprj2/jXs88e56uQa/uMjc4alkxWXNX716k4eWXMAgF//aw8v3XE2lUVHnjFNGNs6oimuuW81jYEEZ9aV8evrFw9bJ6upK8HHf/M24aTKzWdN4/YLZvbrcBxKZ1TmEw+uYXdHjJOnlHD/DSfndHQ2N4X41O/Xohkmv7pmIZfMn4TDlj6uAzGZrz6xmTd2dVLisfP3L53F5FIPkL5b/szGFn644n0AfvGSjVe+ei4TfOljwB+TueTulQS7iw2v2NLKvZ88OXP8dURlPnr3W/hjChWFTp67/azMugAbD4T49O/fQTNMvvmhWXzmjKmZiz6hhMLPX9rBY+uaKCtw8OyXzqSmxJNZtz2S4qr7VtEUTPLpZVP46gfrM50sVTd4e4+fWx/ZgMNq4YlblzG/pjizrj8mc8uf1vPu/hD1VYU8evNpOZkQ93TEueKedJz8zkfm8MnTajPtiqVUHl7dyC9f3kWF18nfvnAG1VntEsYOMQdLEARhAAlZ44E39yFrBqpu8sCb+0io2vFu1ogxDJO/bTqY83+aYbK9NZLX+qZp8vDqRuKKjmHCvW/sJSb37q+NB7oynSuAJ9c3EU6qw9L2uKzx1m5/9+uEhm27kO407vPHAXhqQzMpTR9kjfzEZY1nN/bu72BcoSWUHJZtC2NTQ2ecxkACgLf3Bogrw/NdA3jpvbbMcfHHVY3IQ/get4aT7O6IAbBhfxexVO9xnVR0fr+yAVU3MU148M0Goqne4y+p6ryxqxOAroTKyu7jFNJ3ap7c0JR5HpW1nHjTEZUznSuANfuCOe3edjCMP5Ze3hmTc9ZNKhp/fLsRzTAz7zmeFY9kzeCxdenXDsSVTPzI3nZTMH08PrxmP7JmZJbFZI17Xt+LbpgkVZ0/vN2IZvQuj6Y03t0fAmBXe4z2iJyz7UfX7s/Ewt+9tY+40tuuhKJz3+t70+8pKvNmn3YJY4foYAmCIAzAabdy4ZyqzPPzZ1eO6yGCFovEsmml/f5/WnlBXutLksQH507IPP9AfWVOQdLZE4ty/v6UqSUU5HlnbDAuh5WaEjcAFYVOvK7hG5wxpayAAkf6c583qWhY7rgBOGwWFk/pveptt0pUibtXJ7TaMg+e7u9aTYkbl334TtGWTS+jZ6TZqVNLsFny33ZVkYui7mOq0uvMtBHAabNw/uzKzPOzZ5bjtucur+2+Y2WRYFFt73fe47SxdEpvzLFIML2iN96UFzqwW3tHDEwp82DNaveMykJs3SMK7FaJuoreqlVOm5VzZpZnnp82rRRn1v60WSQW1PiAdK2txVntAqiv8mb2/+La4szrAHjsVs6fXZF5/sG5VTn7s8DROyywyGXrN0Qwe3+dUVeG09q7v+w2C2d1t9tmkVhSW4IwNkmmaR7vNmTkW+lcEITRK98q58dbPvEmlFDojMqYpE8sxnvR4UBM5rvPvseL77XhsFr46kX1XHfqZHzu/N53JKUSiCkkFI1JxW5KsvZXLKWyqSnMA2/uZd6kIm46e/qwzpXqjKZo6kpSU+zuNxfjaCi6TjCm0BpOUVPi6XeydDTaIyl++689NIeS3HHBTOqrCvMejin0GgsxJ594o+g6nVGFBn+M+krvsA4XjcsaHVGZ1lCSWRO8Qxp6qOkGnTGZvR0xZlZ5qewzXymUSN99Tak608sL+w2hbY+k2NQUoq6ikIk+V87cTH9M5uHVjbx/MMKt59Qxd5KXAmd66GJS0Xi/Ncr//GMnRW4b3/nIHGpLPZnXTioaB4JJVu31c0ZdObWl7pzjJ5RQ2NsZI5zUWFjj6/ee/VGZne1RakrcVHideLLWVTSdQFyhLZxicqmnX6zqSqSXObvnfGbPozJNk46ozO72KHWVhVQU9s4rA4gkVdojKYJxhZlVXkr77K9ATKa5K0l5oYPSAoeICaNMvvFGdLAEQRhWY+FkB0S8OZRwQiGh6lgkCa/LlnPSMRyiKRWnzZqZo3Gik1Ud1TAodOY3H0bobyzEHBFvDk3TDRTdOGSsCScUrFZJHCPCqJBvvBHdYkEQBCHD53HgG8Hte13iJCmb027FyfgdeioIg7FZLTl3ePryjfORA8L4JC4hCoIgCIIgCIIgDBPRwRIEQRCGTTSlEk6qDDT8XNON9BBEZfxmYxQEQRAEMURQEARhnAklFBTNQJIkygoceSd8CCUUOqIyr7zfTqHTxoVzqyh25xYMPpRATGbjgRB/WrMfWdP52MJJXDS3igqvC717kvzj65pYucdPRaGTz50znWnlBZnEIaGEQlzWicoqpR4HXrctJwshpCfqWyxSTpayHrpukFB13A7rkDKkCYIgCMJwEx0sQRCEcUI3TPYH4vz7X7fyTkOQ6mI33/nIbM6eWZHJcmUYJv64jGmC22GlqHtOVDCeLrz5+LreujT/77lt3HXtIi6aU4XHaSOSVPHHZLYdjHBStY/yQgdel51ATOZrT2zm9e56N5CuWfPgWw08ddvphBIql9/zNpGs+jkvvNfGly+YyU1nTUPRDL7+5Cbe2JWu+eKyW/jeR+fy0QWTKHLbiSRVtrdGePCtfbgdVr74gRnUlnrwdHf8/FGZp95t5q3dnZw6tZTrT6ulMquwpz8ms68zTjipsqDGR8UAWQYNwzxsR/Rwy03TzMmq1iMup7MqbmkJU9+dfW2omSiPpl3C2HS4z1TT0/WWDjVn6UT8PohjRBiNRAdLEARhnAjEZK68dxVdiXShz5ZQki/+eSNPf/50Tu6uN7PXH+PGP67jYCjJ1adM5lsXz6K0wMn21khO5wrAMOErj29i5bfOx2a18NK2Nr751JbM8ruuXchHF0xic1Mop3PVo8EfZ0dblLte2ZXTuerx63/u5vqlk7n7X7sznSuAlGrwnWfeY3FtCV6XjVV7/dz2yLuZ5S9sbeP5289i9sQiAnGZWx/ZwIb9XQC8vSfA81tb+fPnllFe6CQQk/n8IxtY15heXuKx8/yXz2ZSsTuzvQOBOPe8vpcFNT4+fNLEnDTTkaTKyj1+Xt/ZwU1nTaOuojDn5LYzKvPAm3tx2qzceObUTCpo0zRZ29DFTcvX0TNa8jsfmcMNy6bgduSX1KKlK8E9r+1lZlUhH19UndOuaEplbUOQl7e18enTp1JfVYjDJpJljGXRlMq6hiAvbmvjMwN8pp3RFP/3z91ouslXL6rPSeOu6QZ7OmL84e0Gzp9dyel15fjc4zuhTELReK8lzBPrm7nmlBpOqvblZCIMxmUeXXsAf1ThC+fViTpzwjElxlEIgiCME++3RjKdq2z3vL6XuKwRSih8569bae5KYpjw+Lom9gcSxGWNh1Y1DrhNw4SVe/yEkgo/eWF7zrIfr9hOV1zh4TX7D9kml93K+u7Oz0DCKZWn320ZcNnyVY34YzL/++runP/XDZO7/7WbpKIRSWqZzlWPXe0x2iMpAFrDqUznCqArofLw6kYMI93r6YzK3PCHd3hsXRPfeeY9tjSHcrbVGk7xhUff5Yn1zVx172qCcSWzLKlo/OSF7Tz4VgO/eW0Pv3ltD5qRvsMQiCn8eMX7ZE9F+9UrO4mk+n8+A/HHZG5+eD2PvnOAHzz3Pqv2BXKWB2IKNy9fzxPrm7n6vtUDfu7C2BKMK3x2+Xqe7PlM472faVLR+emLO3hkzQEeW9fEd599j1jWdykYV7jqvtU8sb6Z2x55l85o6ni8hWMqnFC5/sG1PLWhmU88uJZwMvcYeGJ9M798eRfLVzfyzae3EBbHiHAMiQ6WIAjCOJFS9QH/X1YNdNNEN0wSSu7fJBUd0zRRNOPw2zX7bz+l6piYqPqh1zUGqbUowSHXTyg6pgnJAd5XQul+T4dYV+5+PwPtk2hKwyTdLtM0iWXdXet7py07IUdS1cl+N7pp5nSY0sk90o8NzH7tVnWTfEtPGqZJNLtdfU4es9+XrOmD7mdh9Eupvd9lWdPRsz7Tvt+HmKyhZ33kRp/jJC4PHAvGE81Ix7Sex5qeewyEEr0XQ6JJLWd/CsJIEx0sQRCEcWLR5GKcAxTw/dSyWopcdsoKnXz3o3NxdA9xW1Djo77KS6HLzpVLag653XPrK/C67Hz2zGk5/3/z2dMpdju4bFH1IdfVdIOZlYWHXO6yWzl/duWAyz5xWi2lBQ4+c/qUfss+d/Y0Cp12fB47taWenGUVhU5qStJDAKeUFWQeA9itEjedNQ1rdyKMkgIHv/vMKSyeXMx1SydzRl1ZzramlhXwhQ/UsbDGxwOfPoUid+8QpEKnnR9+bB5nzijjA/UVfPNDs7F379tSj4Pbzq3L2dY1p9RQ4MxvGF+Zx8H9N5zMktpirlxSwwfnVuUsrypy8dWLZrKwxsdvPrEkM5dOGLsqvU6+9sF6Ftb4uPv6xTlD/AqcNr5/6VxOmVLC4snF/PTyBTnLfW4793/qZBbW+PjSeTP6HRPjkc9t58eXncSCGh8/vuykfkMibzprGh+aV8Wp00r55TULKS0Q9bSEY0caKJXu8SIqnQvC2JdvlfPjbTzGm5Sqs/FAF19+bBOdURmH1cJNZ03llnPqMvN3UqpOKKGiaDoFTltmzpA/JnPbIxtY35g73O7z59Zx2wfq8LntBOMKW5pD/GtHBxfOqWJ+tY+SAgedUZnrHljD3s5YzrpFbhuvfuVc9gcSXPfgmszV5h4fmjeBn185n7iic90Dq2kKJjPLPnlqLV/70CxKCxx0xRX+urGZ5av247JbuOOCmZw1owKfJ31C1dKV4L9e2M66xi4WVPv43qVzqS31ZBJPdERSrNjSSkdU5tqlk5noc+HKykSoGyaRpIrDZhkwY2JS0UmqOl6XLdOByhZOKEiSlEkk0iOUUNjZFuWF99o4o66UpVNKKe3e3/kwDJNwUsVus1CeAWdtAAAgAElEQVQ4QLtSqk5C0Sl0Wk/o+VdjIebkG28G+0y74gomJqUF/b9Hqq4TTem47da85/mNdbKmE5d1CpxWnAPsr2hKRTfMISeXEYRDyTfeiA6WIAjDaiyc7MD4jTe6bhCIKyRVHafNitdlyyvNOqQ7WRv2B3l6Qwseh5XPnDE1J5X64XREUzyyej9PrG9G0Q0umlvJl8+vZ4LPhazpHAgk+MU/drBhf4jyQgc3nTWNi0+amLmq3BmVafDHaQklWTS5mBKPPed1Nd2gK6EiSVBW4OiXtS+WUkkoOq6szIjCiWEsxJzxGm8E4USTb7wRWQQFQRDGEavVkpNdbCjKC518aN5EzplZgUWScA5Qb+pQKr0uvnT+TD61LD2cz+uy4e7O6OVx2Jg9sYi7rl1MStWxDFCfq8LrpMJ76Ls7NqvlsMsLXXYKRcdKEARBGAVEB0sQBEHI4XYc2U+Dw3b4zp3PbR/3qaMFQRAEQSS5EARBEARBEARBGCbiDpYgCMIJJhCT0Q0zZxjfcOiKK8RkDc0wKXTa+g3pS8gaMVnDZrFQWigmnQuCIAjjk+hgCYIgjDHpTIAKhpkedtc3iUUwrhCXNZx9huypusGu9ijffGoLTV0JPr5wEndeWJ/JJGiaJm2RFE9vaKas0MlFc6so75P1zh+TSSrpTGXlWR0of0zmzsc2snJPuiDutPICHrn5NKqL0ynSAzGZX72yi+c2H6S2zMMvrlpIfWUhtu6sfKZp0hmVkTWDAqetX0rl9nCKv25sweuycvG8iTmvfTx1RFL8bVMLDquFSxZMOuw8saEKJdId1v/f3n3HuVXdeR//HNUZafq4jBvGGGNssMHYQAAnsCGFFsgCycISIGzaZhPSdzfJK7tPssk+uykPKbBJlg4hCSGkEEijmoRmG7AB4wLu3dOL6pWuzvOHZM3IMzZjrBmNNN/3Px7pztX9HZ07P+une+45fq+HCeEA3iFmMDyYmJPOLrxqoT7kJ3RAId0RSRJzXIJ+D5NqD++evd54ir5ECp/XQ1M4MOTMivLmtPYmiKeyM3we+LcXT6Xpjh28T49EYZ/68XsL779s60uSSLlUB7yD4to/M2nG2iHzUXtfkta+BE3hIE1hf8HsiK6boT3qkEi51Fb5D3sq9ba+JDEnTSgw+AsdkVJSgSUiUmZe2dXDVbcsJ53JcNPfn8I750/Of8jtijl87cFXeWD1bqbUV/HbT5zF5FyR1RV1uPKW5+iNZxcs/clz22mpr+Jjb5uNz+uhLZLk4puepq0vCcCja/dxw/tPzk+H3t6X5IN3rmDNrl5OntHArdcsYUJtkEzG8tBLu/PFFcCW9ijfeGgt3758IQGfl7ue3cpPl28HYM2uXq665Tn+/Jm35QvAfb0JLvmfp9nXm+TyxdP5yoXz8rMItvUluOzHz7CzKzuN+0Mv7+FHVy3OTz1fKm19Sa685Tk2tUUB+PWqXdxx3ak0DzGF9uGKJFLc9cxWvvvo69RV+fjdJ5dy9ITwsPa11vLCti6uu2MlFrj1miWcfdzE/KQinVGHL/7qFR5Zt4+jmkLc//Ezhl1kxZw09z2/g2/8fh3hgJfffvIs5kyqfbPNlAFaexO893+eZndPglNmNnDzB5YUfJHw4rZurr19BRa4+erFnDN3El6POfgLDlPccbn/xZ38x4NrCQW8/PYTZ3Hc5P4+betL8oFbl7NhXx+nzWrkh1ctLiiyXt3dw5U3LyeVyXDjlYt49wmT8wVae1+Sa+9Ywau7e6nye3jo+qUcO+B82dWd4KIb/0pvIs3FJ03laxefMOy/6/1/fxtbI8yZVMPPPnI6Ew/zywKRkaKvnUREykjKzfCTZ7fhuBkyFu54egvRZDq/3UlleGD1bgD29CRYv7c3vy2STOeLq/0eXdtKXyL7XG88nS+uAB7f0Eoy7eYf9yXTrNmVfb3VO7qJ5I4bS6VZtqFtUKwrtnQSc1wiyRSPr28t2NYVSxF1+mNZs7uXfb3ZY9//wk6cdCa/LZp088UVwHObO0kO2F4q8ZSbL64AXt7ZQzJVnLhiKZe7n90GQG8izWMHvH9vFNddz2wlnbG4Gcudz2wllup/rxMpl0fW7QNge2eM7R2xYb92NOly5zNbsz87Lg+9tGfY+8qhbWmPsrsnAWSLqViq/28v7gzRp076YC91WCLJNHfl+jTmuDz40u6C7d0xhw37+gBYsaWLmNMfV9rNcHcuH1kLdzy9lWiyf3vMSfPq7mzOSKQyPLau8Dx+amMbvbn887uXduO4w//76Y45bGzNrr33emske8VWZIxQgSUiUkb8Xg/vOWlq/vEFC6YUDBXyeQ2nz2oCoDboK7i6EA76qPIXpv1TZzXlh/TUVvkKFrRdNKMRn7f/G/JwwMv0xuyQv6OaQoSC2W+pq/xeTpnZOCjW+VPrCPo9hAI+lsxsKtgWCngL4j6+pZZwbnHUt86ZUHDcUMBLY6h/9sH5U+rwe4/8m/sjVeX3MGnAFYZZE8JFGy4X9Hl55/zJAPi9hrNmNw8/Lp+Xixb2nyMXLZxC9YBhWQGfh4XT6wFoCPmZ0RQa9mtX+z28+4QWAHwew9uPnzTsfeXQZjaHqKvK/k3MmhCmesAyCVX+wr/7CxdMKdh+JKoDXt49P9un3iH6tL7aT0vuSvPsiTUFx/V5PVx80lT2L0t3wYKWgr/rqgE5w2PgzAPO41OPbsr/LZ8+qzDfvJH6an9+WOCk2qDWv5MxRQsNi0hRlcOin1De+aYvkaI7liLlZmgOB6g/YCHg9kiSjohDQ8hPcziQv88pkXJZsaWTz923mvaIw9nHTeA77zs5/yEl5WbY3hHjpic2MqEmyIffOis/vHC/1r4EnVGHpnCgYFhZW1+CK25ezqa27DfKDSE/v/r4mcyeWJPf/tlfrOapjR1MrA3yvb87mSUzG/NrbaXcDJ1Rh+5YiuaaQMEQJDdj2dEV46bHN1IT9PGPZ8+mpb70Q4EyGcuu7jg3Pb4Rv8/wT+ccy9TcPWfF0Bl1aI8kqa3y0RgKUHUYH6h74ym6Yw4WaAwFqDtgevz2viQd0SSNoQDNNcHDGmrWlYsrHPTRUOR7gd6Mcsg5w8k36dz9SHt74kxrqB403K03kaI7mr3XqSk8uE+PRFfUoSOaJBTw0VDtJzTgi5b990d2xVI0hf2D4hqYj5rCgUELk+/rTfDSjm5mT6yhpb6q4B6tRMqlM+rQ1pdkWmP1oPu7DmV/XLu6979fwUELkIsU23DzjQosESmqcviwA+M336RzhUzGZr8VP/DDEEAy5eLxmMO+GtMeSbKzK07ccZk9MTzog3t3zCGRyuAx0HyYkzYAOGkXg8HvG1uDL5x0BgNjLq7xohxyznjNNyKVZrj5RpNciIiMIz7voRcDBvJXlQ7XhJrgIb+BHqqYOxwDZx8bSwIqrEREZAD9ryAiIiIiIlIkKrBERERERESKRAWWiIiIiIhIkegeLBEREZExIpl2CY6x+w2jyTRxJ00o6Bty1sj2SBJrLU2hwZPXdESSxFMufq+HpnCgYPKc/ZPupDOWUMBbcJ/m/lkCV+/oprbKx5zJtYPu8exLpIinXKr9XmoPmKbdzVg6o0mshdpqH9X+wriddIaeeIqAz0P9EDMyJlIuyZRLTdA35IQ8ccfF42HIvspkLLFUmmqfd8h9exMp0q6lodqfXwC8MDYXn8cz5DZrLY6bOeg5knKzk+74DjKJ0EieX2Px3C0VFVgiIiIiJdYTT/Hkhjb+uGYPH1o6iwXT68fEh9XOqMNNj7/Osg1tXLJoGteeMbOgENrcFuGffvoiiZTLd//uZE6cVp8votr7knzk7udZtaOb+mo/P/3w6Zw4rT6/74Z9fVxz2wo6og7vnDeZ/75sAc25Impfb5KLbvwr7REHgJNn1HPrtafmi6zOqMMND2/g0XWtvGPeJD73rrk0hbNxZTKWDXt7+eAdK4km0/zXpQt5x/xJ+eIwkkyzbH0r3330NeZMquUbf3tiQfHWGU3yv09uZtWObq5/+7EsObqxoEDb3R3n6w+tJRz08a/nzS2Yuj6STPPMxnZ+tnw7F500hXfNbymYUr+1N8GXf/MK7RGHr19yIvOn1OaLMGstW9qj3PDIa5wwtY4rTj2KxnD/e90dc3h47T6WbWjlY2+bzbwptQWT/7T2Jbjh4dfwew2fOve4/BIckC0YX9nZw+1Pb+GCBVM4+7iJRZvqf6yeu6WkIYIiIiIiJdYZcfjUvav445q9XHXrcrqiqVKHBMArO7u5/emtbG6P8t1HXmN3dzy/rSOS5FP3rmL93j62dsS47s6VdMWc/PanNrazakc3kP0Q/u8PvEpXNLu9K+rwld+soSP3+JF1+3i9NZLf94HVu/LFFcDqHT3sGXDsV3Z2c8/y7eztTXDP8u2s2dXTH1fU4ZM/W0VrX5Ko4/KZX6yiN97/fvbGU3zq3lVsaovyp1f38pNntxW0+bnNnfzvXzazYksn/3DnSnpi/ft2xxw+f99L/HHNXu5/YSc3PPwaKTdT8Nofu+cFlr3Wxhd++XK+fZC9uvTtP2eLwtU7urnm9uV0DHi/2iMO19y+gode3sM3/7SBlVs7C+La25PgX+5/mT+8spcrbn6OrgFxxR2Xbzy0lntX7uAnz23ne48UxtUdS3HVrcv545q9XP/zVXQOiOtIjdVzt5RUYImIiIiUmGv7PwynMxbL2FinNJUpjCM94LEF0m7/47RrGRj2wA/42X0z+c0Zawte68DfP3DfA499YFwH/n4q0//YAgOXfbUWBu7upN2CfZ10/75uxhb8rrXgDDhW8oDjZqwtOJY7MA5rSaYHtrHw/bLYgnY4g96//l92D2i/xeIM6Iuk6xa2GYs74IkD3/sjceC5yxg5d0tJBZaIiIhIiU2oCfJ/3jOfM2Y3c+s1S2ioPrJ144rllKMaufikqTSG/Fx7xkymN4by25pCAW54/0m01FVRX+3nxx84hYZQ/7Czs+dOZPbEMABBn4d/v2h+fhhfc02Qr148n1AgO5Rs8cxG5k2py+976SnTqavqH5Y3Z1INM5r6j71oRgPnn9hC0OfhghNbWHRUQ35bY8jPd99/MqGAF2Pgq+85gdoBr1VX7eM/Lj6B5nCA02Y1ct1Zswra/NY5E7hs8XTmTKrhB1cuKrhHqzGcbfMZxzRz7rxJ/Ot5xxfcV1Zf7eebly3gpOn1fOn84wuGHgZ8Xr50wfEsntnIzOYQt127pOD9ag4HufO60zjr2GY+tHQWZ86eUBDXtMZqvnje8Zwxu5k7rzuVhgFxhQI+vvqeE3jX/MlcsKCFf3738QVr9DVUB7jlmiWcMbuZr75nPhNqind+HXju1o+Rc7eUjLVjp8rUSuci5W+4q5yXmvKNSGUoh5wz3HzjpDMkUi7hwNCTI5RKT9whmcpQ5fcOum9n4GQSDSH/oAXB2yNJIok01X4v9SE/VQMWMk+mXLrjKZx0hlDAm7//CsB1M7RGkjyxvpW6aj+nz2ouuKcIssP1HDdDwOsZtJB5ynXpiqawQDjgo6aqcNqBmJMmkkzj8xiawoMXSI8kUiTSGWqDviEXX++OOXg9ZtDkGpDtx0gyTTjgHXLfzqiDm8nQECqc9AOyV7n6EmkCPk/BezXwtRNpl3DAh3eISTCiyTTGMORkJK6bIeq4VPm9RV8gfayeu8U23HyjSS5ERERExoCAz1P0D77FUF8dgOqht3k9pmCShwNNqAkOmv1vv6Dfy+QhiggAr9fDlPpq/v70mQd97QOLqoH8Xi+T6g4+0UIoMPSMiPvVVPmpOejWQx874PPQ5Dv49qbwwbcZYw45+cQbnSPh4MHb5PV6qKsemfNrrJ67paJ3QkREREREpEhUYImIiIiIiBSJhgiKiIiISEmkMxlSaUt1oHA4X9xx6U2k2NEZI+jzMrk+SOMQ9yyJjEUqsERERETkiFhrMWbwpAvtkSSJlIvf66ExFMjfp+OkXVp7k9z1zFa2d8W5cEELZx07geaaID0xh/ue38l3Ht6Qn9a8KRzgB1cuYvHMhvyiv8m0SySRpqbKN+TCtpFkGjdjC2YBFBkNKrBERERExrHumENfIp0rgvwFM98lUi77ehO8uruXhdPrmVQXJODt396aW+g3lkzzD0tnMaW+Kl9o7eiM8aG7VvLavgh1VT6+eflCzp4zkVDQx9b2GO+56al8AfXnV/dy5uxmfnTVKazc2sV//mFdQYydUYcP3r6Cxz9/Dkc1+2iPJLnlL5t58rU2zj5uIh952zH5yTQyGcv2zhj//cd19CbSfO6dxzF/Sh2hARNAtEeS7OtN0BQO0BQKFLS5L5FiT3eCTW0RFh3VyOS6YEHx2BFJ8tLOHqr8Hua11NE4YNKKTMbSFkmyvSPGjKYQE2oC+AZcdeuNp2iPJOmJp5jZHBo0g2Fn1KE75hAK+GgKD56V8VB64ym64ykM2Rkdh5rh8M3qijn0xlMEfd7DjiuaTNMTz87oWF/toyY4/LhSbobOqEMi5VJX5S94r+HQ524pqcASERERGadiyTQ/Xb6db/95A0Gfh9/801nMn9q/HtXOrjjnf/8vpFxLKODl0c+dzdSG7JSCHZEk/3DXStbs6gXgdy/t5qFPLWVSbRXdMYd/+dXLvLYvAkBvIs31P1vFX//1b0hlMnz992sLFt0FeGZTBx1Rh+8++tqQsaYzlrue2cLn3jWXHz+5iVv/ugWA9Xv7yFj4wruOI+j30h5N8r4fP0tbJAnA8i3PsewL5+QLrI5Iko/f8wIrt3YR8Hp48PqlzG2pzR9nza4errxlOQCTaoM8dP1SJtVlZ0rsijn88/0v8/j6VgA++rZj+PS5c/Kz97VHklz0g6doiySpDfr482ffln+/rLX89fU2PvGzVQCcf2IL//fSBTTmZiTsjjl87cFXeWD1boI+Dw9dv5Q5k/vjOhQnneH3r+zhS79+BYAfXLGICxe0FGXK9L5Eipuf3MSPntxMtd/Lg9efxbGThheXtZYVWzr50F0rscBNVy7ivBOGH9eurjgX3fgUkWSaa8+YyefeNTd/RTKWTHPPc9v4zsOvDXnulpIGsoqIiIiMU1HH5b7ndwCQTGf4/Su7C7av2NJBys2umRpzXNbv7c1vc9KZfHEF0NqXJJZ0s9vcDC9u6yp4rXTGsrcnQTKV4dXdvQzFcTNsaoscNN51e/vojadYtqGt4PllG1rpS6aB7P1b+4sryK7VtXZP//GS6Qwrt3blj/fEhtaC13ro5T0FbepNpPrb4FqWDfj9P7+6l5jj5h/v7U3kj92XTBe8X4mUy29X97+/D6/dR8rtLzJTruXBl3bnY9xfxA1H1Enzy1w/Atz3/A5iKfcQewxfPOVy/wu78j8/tm74cSVSGX6+YjsZC9bCvSsPL65nN3UQyfXrr17cRXLAvtlzdycw9LlbSiqwRERERMapUMDLe0+eBkDA6+H8E6cUbD/16CZ8uQVtq/we5k7uv0Lg93mYP6X/8YSaAKHcZBUBr4eTZzQUvJbXY2ipqyLg83B8y9BXQAJeD7MmhA8a79zJtdRV+Vh67ISC5986ZwI1uatI1QEvzQOGknkMzBsQZ8DnYdFR2dj8XsPZx00seK3zTmzJ/zyxJkjdgKF2Po9h6Zz+3z/3+En5NgO01FXl17kKB7zMbek/bpXfy0UL+9/fc46bWDBph99rOO+E7LGDPg/nzC2M61DCAR/vXTQt//jSU6YRKtJwuWq/l0tOnpqP62+OnzTsfav8Ht63ZDr7R1hevnj6YcV1+jFN+ff3PSdNLVh8ORTw8reLDn7ulpKx1pY6hrzhrnQuImPXcFc5LzXlG5HKUA45Z6znm+6YQ088RdDnoSEUKPgQG3fS7OlJ8NLObhYf1UhLfVXB/Tf7ehPc+fRWok6aj771GKY2VOPJFWTbO2J88I4VbG6PEg54+a9LF3LuvEmEgz7W7+nl4puexhlwBee0oxu55ZolPLu5g3+858VBcXo9hsc+fzZHN4dpjyS58bHXeWpjO0uPncD1587J34PlZizbOqJ84/fr6Euk+Nw7j2Ph9IaCRXjbI0l2dcWZWBukKVzY5r5Eiu2dMV7fF+G0WU0F95VBdojhiq2dhPxeFkxvKFg42M1Y2vqSbGqLcMyEMBNqgwVFVE88xb7eBN0xh2Mn1Qy6B6sjkqQj6mTvNzrMe4p64im6og4eY2gI+Q+5YPHh6opmz5Eq/+Hfg9WXSNEbT2PJTjhyOPeGpVyXzmiKuONSV+0b9H4d6twdCcPNNyqwRKSoyuHDDijfiFSKcsg5lZ5vMhlLBovPM3hgVFtfdhbBgM9DfbU//wE4mXbZ25Pgtqe2sKMzzoULWzhn7iQm1ATpjjn8dPk2vv/oxnwBVlft4wdXLOK0WU2EAtlCKZFy6Uukqa3yDfnBui+RIp2x+XucRI7UcPONJrkQERERkTfN4zF4GDxFO8DE2uCQzwd9XmY2h/m3C+fjuJmCq0sNoQAfPHMWly2ewZa2CFV+L9Maq2moLrxyUuX3HvKKRTFn0RM5HCNWYBljZgB3A5MBC9xsrf3+SB1PRMYv5RsRGU3KOcXj93nw+wZf+QoHfYSDPlpys/eJlJORvIKVBj5vrX3RGFMLvGCMecRau3YEjyki45PyjYiMJuUcETmoEZtF0Fq7x1r7Yu7nPmAdMO3Qe4mIHD7lGxEZTco5InIoozJNuzHmaGARsHyIbR81xjxvjHm+ra3twM0iIodF+UZERtPBcs54yjeJlEvcSZc6jMPiZiw9MYd4kdaKOhxO2iWaLK/3CyCaTOMcsDi0DG3ECyxjTA3wK+Az1tpBq8pZa2+21i6x1i6ZOHH48/2LiBxI+UZERtOhcs54yTdtfUm+8ts1fPa+l9jTEy91OMPipF1W7+jiIz95ge/8aT2d0eQb71QkHZEk3/j9Oj597yq2dURH7bhHwlrL5rYI1/98Fd/603o6I6P3fpWrEZ1F0BjjJ5t4fmqt/fVIHktExjflGxEZTco52enZf7hsI/e/sBPIXsm68cpFY372vu5Yig/cuoJ4ymXFlk5OmdnEhQtHZ5Ha36zaxd3PbgNgX2+SO687leaaoWdaHCvaIw4fuft5NrVFeRw4rqWW9y+ZUeqwxrQRu4Jlsiuy3Qass9beMFLHERFRvhGR0aSc088zYAHegT+PdQND9Yxi2IXv1+gd90gYKFhouVziLqWRvIJ1FnA18IoxZnXuuS9ba/8wgscUkfFJ+UZERpNyDtn1rz5+9mziqTSRpMuXz5835q9eATSG/fzsw6fz7Yc3sHBaA6cf0zRqx77k5Kns6UmwsyvGly+YN+avXgFMqA1y6zVL+K8/rmf2xDB/M3dSqUMa84y1ttQx5FX6Suci48FwVzkvNeUbkcpQDjmn0vNNys2QsZag7+CL/o411lr6Emmq/J6CxYtHQ8rN4GbsIRdJHovijovfa/B5R2WOvDFpuPlmRO/BEhEREZHK5i/DD9zGGOqqS3O1ze/1UGa1FQDVgTIMukTK7y9CRERERERkjFKBJSIiIiIiUiQqsERERERERIpE92DJiIgkU0QSadwMVAc8NIUHz5ITSaYJeD0EfKrzRURERKQyqMCSg+qJOXTFU/g9HhpCPsLB4d0M2hVzuOUvm7n5L5tJZyxnzG7mxisWMaE2W2RFEmle3d3Dj5/czHEtNXx46TFMrB3705SKiIiIiLwRXTqQISVTLve/uJNzvr2Mt37rcVbv6Bn2vjs7Y/xw2SbSmewSAM9u6uCe5dtIuRkA9vUluOKW53hiQyv/++Rm/v2BNfQmUiPSDhERERGR0aQCS4YUdVweWL0bgIyF367eRSYzvDXTVu3oHvTcc5s7iDkuAKu3dzNw+bVnNnWQyG0TERERESlnKrBkSDVBH1e/ZSYAAa+HK049Co/HDGvfU48evCL6OXMnEc6tn7Dk6Ea8A17r3OMnEQpqbQURERERKX+6B0uGFPB5OO/EFs6c3YzHY2gMBYa979T6av7tonn8v4dfI5FyuWDBFN63eHp+5e+JtUEe/ORSbntqM/Om1PG3i6ZRM8z7u0RERERExjIVWHJQtVV+aqsOv/CpD/m56vSZXLRwKtZCKOAtWC09FPAxf2od/33ZQnwegzHDuzImIiIiIjLWqcCSEVHl91LlP/SwP79XI1RFREREpLLoE66IiIiIiEiRqMASEREREREpEg0RFBGREdcbT9ETT/HKrh5a6qqY0RTSAuMiIlKRVGCJiMiI6oo5/GjZJm7+y+b8c0c1hbj3o29hakN1CSMTkZHmuhn6kulh3ZstUik0RFBEREbU1vZoQXEFsL0zxld++wq98VSJohKRkeakM6zc1sVH7n6eHzz2Ol1Rp9QhiYwKXcESEZER42Yy/HzF9iG3LdvQRjzlFizjICKVozvmcO3tK0imM6zc2sXZx03k9GOaSx2WyIjTFSwRERkxBkPQN/SwIK9Ha+CJVDQDQX//R81qDRGUcUIFloiIjBiPx3DVW44acttFC6YSDmoghUilag4F+OXHzuC9J0/jW5cv5KjmUKlDEhkVKrBERGRETa2v5j/feyJBX/9/OafNauRLFx5PjQoskYrl9XqY21LHd963kPcvmUFDKFDqkERGhf5nExGREVVX7efSU6bzjvmTaetLUlvlo7bKT1NYH7ZExgOfV9/ny/iiAktEREZcdcBLdcDL5LqqUociIiIyovSVgoiIiIiISJGowBIRERERESkSFVgiIiIiIiJFogJLRERERESkSFRgiYiIiIiIFIkKLBERERERkSJRgSUiIiIiIlIkKrBERERERESKRAWWiIiIiIhIkajAEhERERERKRIVWCIiIiIiIkWiAktERERERKRIVGCJiIiIiIgUiQosERERERkRKdelI5Ikmky/qf07ow698VSRo3pj0WSajkiSlOuO+rEPJZaLy0mPrbikkAosEbokRDsAAAiaSURBVBERESm6eCrNUxs7uPq2FXzzT+vpjDrD3tday6a2CB+6cyWf/sUqWnsTIxhpoc6owzf/uJ6rb1vB0xs7iDtvrjgstq6ow/cefZ2rb1vB4+tbiY2RuGQwFVgiIiIiUnR98TQfvft51u7p5e5nt7Fhb++w9+2KOXz+vpdYtaObJ9a3cdvTW0Yw0kLr9vRy93PbWLunl4/e/QK9ibFRyGzrjHLzXzezdk8vn/jZKnrjYyMuGUwFloiIiIgUn4HaKn/+YV21/xC/XMhrPDSE+n9/Yk2wqKEdSv2AOGuqfHjMqB36kGqCvvzP4YB3zMQlg/ne+FdERERERA7PhHCQ+//xDO58Zitnzm5memP1sPetD/n51mULufWpLUysDXLpKdNHMNJCM5qq+dFVp/Ds5g4+eObRNIdHr7g7lEm1Vdx27RKWbWjj6jNm0hwOlDokOQgVWCIiIiJSdB6P4ZiJNfzHJSe+qf0n1VXx5QvmFTmqN1ZfHeD8BVM4f8GUUT/2odRV+zl33mTOnTe51KHIG9AQQRERERERkSJRgSUiIiIiIlIkKrBERERERESKRAWWiIiIiIhIkajAEhERERERKRJjrS11DHnGmDZg2xCbJgDtoxzOaFMbK8d4aOeh2jjTWjtxNIN5M5RvKr6NMD7aqTaWQc4Z5/kGxkc7x0MbYXy084g/44ypAutgjDHPW2uXlDqOkaQ2Vo7x0M5KbmMlt22/8dBGGB/tVBvLWyW3baDx0M7x0EYYH+0sRhs1RFBERERERKRIVGCJiIiIiIgUSbkUWDeXOoBRoDZWjvHQzkpuYyW3bb/x0EYYH+1UG8tbJbdtoPHQzvHQRhgf7TziNpbFPVgiIiIiIiLloFyuYImIiIiIiIx5KrBERERERESKZEwXWMaY84wxG4wxG40xXyx1PMVgjJlhjHnCGLPWGPOqMebTueebjDGPGGNez/3bWOpYi8EY4zXGrDLGPJR7PMsYszzXp78wxgRKHeORMMY0GGPuN8asN8asM8acUWl9aYz5bO5cXWOM+bkxpqrS+nE/5ZzyVun5BpRzKqwvlW/KmPJNZfTlSOWbMVtgGWO8wP8A5wPzgSuNMfNLG1VRpIHPW2vnA28BPpFr1xeBx6y1c4DHco8rwaeBdQMefxP4rrX2WKAL+FBJoiqe7wN/stYeD5xEtq0V05fGmGnAp4Al1toTAS9wBZXXj8o5ZXyeDlDp+QaUcyqiL5VvyvccHUD5psz7ciTzzZgtsIDTgI3W2s3WWge4F7ikxDEdMWvtHmvti7mf+8ierNPItu2u3K/dBby3NBEWjzFmOnAhcGvusQHeDtyf+5Wybqcxph54G3AbgLXWsdZ2U3l96QOqjTE+IATsoYL6cQDlnDJW6fkGlHOooL5E+aas+0/5pnL6khHKN2O5wJoG7BjweGfuuYphjDkaWAQsByZba/fkNu0FJpcorGL6HvAvQCb3uBnottamc4/LvU9nAW3AHblhArcaY8JUUF9aa3cB3wG2k006PcALVFY/7qecU94qPd+Ack4l9aXyTXlTvskq674cyXwzlgusimaMqQF+BXzGWts7cJvNzp1f1vPnG2MuAlqttS+UOpYR5ANOAX5krV0ERDngUnm592VubPUlZBPtVCAMnFfSoORNqeScM07yDSjnSJlQvqkIyjdHYCwXWLuAGQMeT889V/aMMX6yieen1tpf557eZ4yZkts+BWgtVXxFchZwsTFmK9mhD28nO5a3IXcZFsq/T3cCO621y3OP7yebjCqpL98BbLHWtllrU8CvyfZtJfXjfso55Ws85BtQzqmkvlS+KV/KN5XTlyOWb8ZygbUSmJObySNA9qaz35U4piOWG6d7G7DOWnvDgE2/A67N/Xwt8MBox1ZM1tovWWunW2uPJtt3j1trrwKeAC7P/VpZt9NauxfYYYyZm3vqXGAtldWX24G3GGNCuXN3fxsrph8HUM4pU+Mh34ByDhXUlyjflG3/Kd9UTl8ygvnGZK/ujU3GmAvIjnP1Ardba/+zxCEdMWPMUuCvwCv0j939MtkxyvcBRwHbgPdbaztLEmSRGWPOAb5grb3IGHMM2W98moBVwAestclSxnckjDEnk73JNQBsBq4j+8VFxfSlMeZrwN+RnR1qFfBhsuORK6Yf91POKd/zdL9KzjegnENl9aXyTZlTvin/vhypfDOmCywREREREZFyMpaHCIqIiIiIiJQVFVgiIiIiIiJFogJLRERERESkSFRgiYiIiIiIFIkKLBERERERkSJRgSVFZYx5rzHGGmOOL3UsIlL5lHNEZLQo38hwqcCSYrsSeCr3r4jISFPOEZHRonwjw6ICS4rGGFMDLAU+RHZ1c4wxHmPMD40x640xjxhj/mCMuTy3bbEx5kljzAvGmD8bY6aUMHwRKTPKOSIyWpRv5HCowJJiugT4k7X2NaDDGLMYuBQ4GpgPXA2cAWCM8QM3ApdbaxcDtwNlv4q9iIwq5RwRGS3KNzJsvlIHIBXlSuD7uZ/vzT32Ab+01maAvcaYJ3Lb5wInAo8YYwC8wJ7RDVdEypxyjoiMFuUbGTYVWFIUxpgm4O3AAmOMJZtMLPCbg+0CvGqtPWOUQhSRCqKcIyKjRflGDpeGCEqxXA78xFo701p7tLV2BrAF6AQuy41Tngyck/v9DcBEY0z+crox5oRSBC4iZUk5R0RGi/KNHBYVWFIsVzL4m5xfAS3ATmAtcA/wItBjrXXIJqxvGmNeAlYDZ45euCJS5pRzRGS0KN/IYTHW2lLHIBXOGFNjrY0YY5qBFcBZ1tq9pY5LRCqTco6IjBblGxmK7sGS0fCQMaYBCABfV+IRkRGmnCMio0X5RgbRFSwREREREZEi0T1YIiIiIiIiRaICS0REREREpEhUYImIiIiIiBSJCiwREREREZEiUYElIiIiIiJSJP8fqAN/00HEcFwAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axs = hdf_fenced.stratify(['Pclass']).cols[['Age', 'logFare']].scatterplot(figsize=(12, 6))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.0" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: requirements.txt ================================================ numpy>=1.14 scikit-learn>=0.20.0 pandas>=0.24 matplotlib>=2.2.3 seaborn>=0.9 pyspark>=2.3 scipy>=1.0 findspark pyarrow>=0.8.0 ================================================ FILE: setup.cfg ================================================ [metadata] description-file = README.md ================================================ FILE: setup.py ================================================ from setuptools import setup, find_packages def readme(): with open('README.md') as f: return f.read() setup(name='handyspark', version='0.2.2a1', install_requires=['pyspark', 'matplotlib', 'numpy', 'scipy', 'seaborn', 'pandas', 'scikit-learn', 'findspark', 'pyarrow'], description='HandySpark - bringing pandas-like capabilities to Spark dataframes', long_description=readme(), long_description_content_type='text/markdown', url='https://github.com/dvgodoy/handyspark', author='Daniel Voigt Godoy', author_email='datagnosis@gmail.com', keywords=['spark', 'big data', 'data cleaning', 'visualization', 'exploratory data analysis', 'pandas'], license='MIT', classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'Topic :: Scientific/Engineering :: Visualization', 'Topic :: System :: Distributed Computing', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3' ], packages=find_packages(), zip_safe=False) ================================================ FILE: tests/handyspark/conftest.py ================================================ import findspark import os import pandas as pd import pytest from pyspark.sql import SparkSession from pyspark.ml.feature import VectorAssembler from pyspark.ml.classification import RandomForestClassifier FIXTURE_DIR = os.path.join(os.path.split(os.path.dirname(os.path.realpath(__file__)))[0], 'rawdata') findspark.init() spark = SparkSession.builder.getOrCreate() df = spark.read.csv(os.path.join(FIXTURE_DIR, 'train.csv'), header=True, inferSchema=True) dates = pd.DataFrame({'dates': pd.date_range('2012-01-01', '2015-12-31').values}) @pytest.fixture(scope='module') def sdf(): return df @pytest.fixture(scope='module') def sdates(): return spark.createDataFrame(dates) @pytest.fixture(scope='module') def pdf(): pdf = pd.read_csv(os.path.join(FIXTURE_DIR, 'train.csv')) return pdf @pytest.fixture(scope='module') def pdates(): return dates @pytest.fixture(scope='module') def predicted(): assem = VectorAssembler(inputCols=['Fare', 'Pclass', 'Age'], outputCol='features') feat_df = assem.transform(df.select('Fare', 'Pclass', 'Age', 'Survived').dropna()) rf = RandomForestClassifier(featuresCol='features', labelCol='Survived') model = rf.fit(feat_df) return model.transform(feat_df) ================================================ FILE: tests/handyspark/extensions/test_evaluation.py ================================================ import numpy as np import numpy.testing as npt import pandas as pd from handyspark import * from pyspark.ml.classification import RandomForestClassifier from pyspark.ml.feature import VectorAssembler from pyspark.ml.pipeline import Pipeline from pyspark.mllib.evaluation import BinaryClassificationMetrics from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_curve def test_confusion_matrix(sdf): assem = VectorAssembler(inputCols=['Fare', 'Pclass', 'Age'], outputCol='features') rf = RandomForestClassifier(featuresCol='features', labelCol='Survived', numTrees=20) pipeline = Pipeline(stages=[assem, rf]) model = pipeline.fit(sdf.fillna(0.0)) predictions = model.transform(sdf.fillna(0.0)).select('probability', 'Survived') bcm = BinaryClassificationMetrics(predictions, scoreCol='probability', labelCol='Survived') predictions = predictions.toHandy().to_metrics_RDD('probability', 'Survived') predictions = np.array(predictions.collect()) scm = bcm.confusionMatrix().toArray() pcm = confusion_matrix(predictions[:, 1], predictions[:, 0] > .5) npt.assert_array_almost_equal(scm, pcm) scm = bcm.confusionMatrix(.3).toArray() pcm = confusion_matrix(predictions[:, 1], predictions[:, 0] > .3) npt.assert_array_almost_equal(scm, pcm) def test_get_metrics_by_threshold(sdf): assem = VectorAssembler(inputCols=['Fare', 'Pclass', 'Age'], outputCol='features') rf = RandomForestClassifier(featuresCol='features', labelCol='Survived', numTrees=20, seed=13) pipeline = Pipeline(stages=[assem, rf]) model = pipeline.fit(sdf.fillna(0.0)) predictions = model.transform(sdf.fillna(0.0)).select('probability', 'Survived') bcm = BinaryClassificationMetrics(predictions, scoreCol='probability', labelCol='Survived') metrics = bcm.getMetricsByThreshold() predictions = predictions.toHandy().to_metrics_RDD('probability', 'Survived') predictions = np.array(predictions.collect()) pr = np.array(bcm.pr().collect()) idx = pr[:, 0].argmax() pr = pr[:idx + 1, :] precision, recall, thresholds = precision_recall_curve(predictions[:, 1], predictions[:, 0]) npt.assert_array_almost_equal(precision, pr[:, 1][::-1]) npt.assert_array_almost_equal(recall, pr[:, 0][::-1]) roc = np.array(bcm.roc().collect()) idx = roc[:, 1].argmax() roc = roc[:idx + 1, :] sroc = pd.DataFrame(np.round(roc, 6), columns=['fpr', 'tpr']) sroc = sroc.groupby('fpr').agg({'tpr': [np.min, np.max]}) fpr, tpr, thresholds = roc_curve(predictions[:, 1], predictions[:, 0]) idx = tpr.argmax() proc = pd.DataFrame({'fpr': np.round(fpr[:idx + 1], 6), 'tpr': np.round(tpr[:idx + 1], 6)}) proc = proc.groupby('fpr').agg({'tpr': [np.min, np.max]}) sroc = sroc.join(proc, how='inner', rsuffix='sk') npt.assert_array_almost_equal(sroc.iloc[:, 0], proc.iloc[:, 0]) npt.assert_array_almost_equal(sroc.iloc[:, 1], proc.iloc[:, 1]) ================================================ FILE: tests/handyspark/extensions/test_types.py ================================================ from handyspark import * import numpy.testing as npt from pyspark.sql.types import IntegerType, StringType, ArrayType, MapType def test_atomic_types(): npt.assert_equal(IntegerType.ret('')[1], 'integer') npt.assert_equal(StringType.ret('')[1], 'string') def test_composite_types(): npt.assert_equal(ArrayType(IntegerType()).ret('')[1], 'array') npt.assert_equal(MapType(StringType(), IntegerType()).ret('')[1], 'map') ================================================ FILE: tests/handyspark/ml/test_base.py ================================================ import numpy as np import numpy.testing as npt import handyspark from operator import itemgetter from sklearn.preprocessing import Imputer def test_imputer(sdf, pdf): hdf = sdf.toHandy() hdf_filled = hdf.stratify(['Pclass']).fill(continuous=['Age']) himputer = hdf_filled.transformers.imputer() sdf_filled = himputer.transform(sdf) sage = sdf_filled.sort('PassengerId').toHandy().cols['Age'][:].values pdf_filled = [] for pclass in [1, 2, 3]: filtered = pdf.query('Pclass == {}'.format(pclass))[['PassengerId', 'Age']] imputer = Imputer(strategy='mean').fit(filtered) pdf_filled.append(imputer.transform(filtered)) pdf_filled = sorted(np.concatenate(pdf_filled, axis=0), key=itemgetter(0)) age = list(map(itemgetter(1), pdf_filled)) npt.assert_array_equal(sage, age) def test_fencer(sdf, pdf): hdf = sdf.toHandy() hdf_fenced = hdf.stratify(['Pclass']).fence('Fare') hfencer = hdf_fenced.transformers.fencer() sdf_fenced = hfencer.transform(sdf) sfare = sdf_fenced.sort('PassengerId').toHandy().cols['Fare'][:].values fences = hfencer.fences pdf_fenced = [] for pclass in [1, 2, 3]: filtered = pdf.query('Pclass == {}'.format(pclass))[['PassengerId', 'Fare']] lower, upper = fences['Fare']['Pclass == "{}"'.format(pclass)] filtered['Fare'] = filtered['Fare'].clip(lower=lower, upper=upper) pdf_fenced.append(filtered) pdf_fenced = sorted(np.concatenate(pdf_fenced, axis=0), key=itemgetter(0)) fare = list(map(itemgetter(1), pdf_fenced)) npt.assert_array_equal(sfare, fare) ================================================ FILE: tests/handyspark/sql/test_dataframe.py ================================================ import numpy as np import numpy.testing as npt from handyspark import * import pandas as pd from pyspark.sql import DataFrame, functions as F from sklearn.preprocessing import Imputer, KBinsDiscretizer from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.pipeline import make_pipeline from sklearn.metrics import mutual_info_score from scipy.spatial import distance from scipy import stats def test_to_from_handy(sdf): hdf = sdf.toHandy() sdf = hdf.notHandy() npt.assert_equal(type(hdf), HandyFrame) npt.assert_equal(type(sdf), DataFrame) def test_shape(sdf): npt.assert_equal(sdf.toHandy().shape, (891, 12)) def test_response(sdf): hdf = sdf.toHandy() hdf = hdf.set_response('Survived') npt.assert_equal(hdf.is_classification, True) npt.assert_equal(hdf.nclasses, 2) npt.assert_array_equal(hdf.classes, [0, 1]) npt.assert_equal(hdf.response, 'Survived') def test_safety_limit(sdf): hdf = sdf.toHandy() # maximum 10 elements returned hdf.set_safety_limit(10) res = hdf.collect() npt.assert_equal(len(res), 10) npt.assert_equal(hdf._safety, True) # deliberately turn safety off -> get everything res = hdf.safety_off().collect() npt.assert_equal(hdf._safety, True) npt.assert_equal(len(res), 891) # safety should kick back in res = hdf.collect() npt.assert_equal(len(res), 10) # safety limit does not affect TAKE npt.assert_equal(len(hdf.take(20)), 20) npt.assert_equal(hdf._safety_limit, 10) def test_safety_limit2(sdf): hdf = sdf.toHandy() # maximum 10 elements returned hdf.set_safety_limit(10) res = hdf.cols[:][:] npt.assert_equal(len(res), 10) npt.assert_equal(hdf._safety, True) # deliberately turn safety off -> get everything res = hdf.safety_off().cols[:][:] npt.assert_equal(hdf._safety, True) npt.assert_equal(len(res), 891) # safety should kick back in res = hdf.cols[:][:] npt.assert_equal(len(res), 10) def test_values(sdf, pdf): hdf = sdf.toHandy() hvalues = hdf.limit(10).values values = pdf[:10].replace(to_replace=[np.nan], value=[None]).values npt.assert_array_equal(hvalues, values) def test_stages(sdf): hdf = sdf.toHandy() npt.assert_equal(hdf.stages, 1) npt.assert_equal(hdf.groupby('Pclass').agg(F.sum('Fare')).stages, 2) npt.assert_equal(hdf.repartition(2).groupby('Pclass').agg(F.sum('Fare')).stages, 3) def test_value_counts(sdf, pdf): hdf = sdf.toHandy() hcounts = hdf.cols['Embarked'].value_counts(dropna=True) counts = pdf['Embarked'].value_counts().sort_index() npt.assert_array_equal(hcounts, counts) def test_column_values(sdf, pdf): hdf = sdf.toHandy() npt.assert_array_equal(hdf.cols['Fare'][:20], pdf['Fare'][:20]) npt.assert_array_equal(hdf.cols['Fare'][:10], pdf['Fare'][:10]) def test_dataframe_values(sdf, pdf): hdf = sdf.toHandy() npt.assert_array_equal(hdf.cols[['Fare', 'Age']][:20], pdf[['Fare', 'Age']][:20]) npt.assert_array_equal(hdf.cols[['Fare', 'Age']][:10], pdf[['Fare', 'Age']][:10]) def test_isnull(sdf, pdf): hdf = sdf.toHandy() hmissing = hdf.isnull() hratio = hdf.isnull(ratio=True) missing = pdf.isnull().sum() ratio = missing / 891. npt.assert_array_equal(hmissing, missing) npt.assert_array_almost_equal(hratio, ratio) def test_nunique(sdf, pdf): hdf = sdf.toHandy() hnunique = hdf.nunique() nunique = pdf.nunique() approx_error = np.array([-1, 0, 0, 59, 0, -2, 0, 0, 9, -12, 2, 0]) npt.assert_array_equal(hnunique, nunique + approx_error) def test_columns_nunique(sdf, pdf): hdf = sdf.toHandy() hnunique = hdf.cols[['Pclass', 'Embarked']].nunique().squeeze() nunique = pdf[['Pclass', 'Embarked']].nunique() npt.assert_array_equal(hnunique, nunique) def test_outliers(sdf, pdf): hdf = sdf.toHandy() houtliers = hdf.outliers(ratio=True) outliers = [] for colname in hdf.cols.numerical: #q1, q3 = hdf._get_summary(colname, '25%')[0], hdf._get_summary(colname, '75%')[0] q1, q3 = hdf.cols[colname].q1()[0], hdf.cols[colname].q3()[0] iqr = q3 - q1 lfence = q1 - (1.5 * iqr) ufence = q3 + (1.5 * iqr) outliers.append((~pdf[colname].dropna().between(lfence, ufence)).sum()) outliers = pd.Series(outliers, hdf.cols.numerical) / 891. npt.assert_array_almost_equal(houtliers, outliers) def test_mean(sdf, pdf): hdf = sdf.toHandy() hmean = hdf.cols['continuous'].mean() mean = pdf[hdf.cols.continuous].mean() npt.assert_array_almost_equal(hmean, mean) def test_stratified_mean(sdf, pdf): hdf = sdf.toHandy() hmean = hdf.stratify(['Pclass']).cols['continuous'].mean() mean = pdf.groupby(['Pclass'])[hdf.cols.continuous].mean() npt.assert_array_almost_equal(hmean, mean) def test_mode(sdf, pdf): hdf = sdf.toHandy() hmode = hdf.cols['Embarked'].mode() mode = pdf['Embarked'].mode() npt.assert_array_equal(hmode, mode) hmode = hdf.cols[['Embarked', 'Pclass']].mode() mode = pdf[['Embarked', 'Pclass']].mode() npt.assert_array_equal(hmode, mode.iloc[0]) hmode = hdf.stratify(['Pclass']).cols['Embarked'].mode() npt.assert_array_equal(hmode, ['S', 'S', 'S']) def test_median(sdf, pdf): hdf = sdf.toHandy() hmedian = hdf.cols['Fare'].median(precision=.0001) median = pdf['Fare'].median() npt.assert_array_equal(hmedian, median) hmedian = hdf.cols[['Fare', 'Pclass']].median(precision=.0001) median = pdf[['Fare', 'Pclass']].median() npt.assert_array_equal(hmedian, median) hmedian = hdf.stratify(['Pclass']).cols['Fare'].median(precision=.0001) median = pdf.groupby(['Pclass'])['Fare'].median() approx_error = np.array([-.8875, -.25, 0.]) npt.assert_array_almost_equal(hmedian, median + approx_error, decimal=4) def test_types(sdf): hdf = sdf.toHandy() hdf2 = hdf.withColumn('newcol', F.lit(1.0)) npt.assert_array_equal(['PassengerId', 'Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare'], hdf.cols.numerical) npt.assert_array_equal(['Age', 'Fare'], hdf.cols.continuous) npt.assert_array_equal(['Age', 'Fare', 'newcol'], hdf2.cols.continuous) npt.assert_array_equal(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'SibSp', 'Parch', 'Ticket', 'Cabin', 'Embarked'], hdf.cols.categorical) def test_fill_categorical(sdf): hdf = sdf.toHandy() hdf_filled = hdf.fill(categorical=['Embarked']) hcounts = hdf_filled.cols['Embarked'].value_counts().loc['S'] npt.assert_equal(hcounts, 646) def test_fill_continuous(sdf, pdf): hdf = sdf.toHandy() hdf_filled = hdf.fill(continuous=['Age'], strategy='mean') hage = hdf_filled.cols['Age'][:].values imputer = Imputer(strategy='mean').fit(pdf[['Age']]) pdf_filled = imputer.transform(pdf[['Age']]) age = pdf_filled.ravel() npt.assert_array_equal(hage, age) npt.assert_array_equal(hdf_filled.statistics_['Age'], imputer.statistics_[0]) def test_sequential_fill(sdf): hdf = sdf.toHandy() hdf_filled = hdf.stratify(['Pclass']).fill(continuous=['Age']) hdf_filled = hdf_filled.fill(categorical=['Embarked']) npt.assert_array_equal(sorted(hdf_filled.statistics_.keys()), ['Age', 'Embarked']) npt.assert_array_equal(sorted(hdf_filled.statistics_['Age'].keys()), ['Pclass == "1"', 'Pclass == "2"', 'Pclass == "3"']) def test_corr(sdf, pdf): hdf = sdf.toHandy() hcorr = hdf.cols[['Fare', 'Age']].corr() corr = pdf[['Fare', 'Age']].corr() npt.assert_array_almost_equal(hcorr, corr) def test_stratified_corr(sdf, pdf): hdf = sdf.toHandy() hcorr = hdf.dropna().stratify(['Pclass']).cols[:].corr() corr = pdf.dropna()[sorted(pdf.columns)].groupby(['Pclass']).corr() npt.assert_array_almost_equal(hcorr, corr) def test_fence(sdf, pdf): hdf = sdf.toHandy() q1, q3 = hdf.approxQuantile(col='Fare', probabilities=[.25, .75], relativeError=0.01) hdf_fenced = hdf.fence('Fare') fare = pdf['Fare'] iqr = q3 - q1 lfence, ufence = q1 - (1.5 * iqr), q3 + (1.5 * iqr) fare = fare.mask(fare > ufence, ufence).mask(fare < lfence, lfence) npt.assert_array_almost_equal(hdf_fenced.cols['Fare'][:], fare) npt.assert_equal(hdf_fenced.fences_['Fare'], [lfence, ufence]) def test_stratified_fence(sdf): hdf = sdf.toHandy() hdf_fenced = hdf.stratify(['Sex']).fence('Age') npt.assert_equal(hdf_fenced.fences_['Age'], {'Sex == "female"': [-9.0, 63.0], 'Sex == "male"': [-6.0, 66.0]}) def test_grouped_column_values(sdf, pdf): hdf = sdf.toHandy() hmean = hdf.groupby('Pclass').agg(F.mean('Age').alias('Age')).cols['Age'][:] mean = pdf.groupby('Pclass').agg({'Age': np.mean})['Age'] npt.assert_array_equal(hmean, mean) def test_bucket(sdf, pdf): bucket = Bucket('Age', bins=3) sbuckets = bucket._get_buckets(sdf.fillna(0.0))[1:-1] kbins = KBinsDiscretizer(n_bins=3, strategy='uniform') kbins.fit(pdf[['Age']].fillna(0.0)) pbuckets = kbins.bin_edges_[0] npt.assert_almost_equal(sbuckets, pbuckets) def test_quantile(sdf, pdf): bucket = Quantile('Age', bins=3) sbuckets = bucket._get_buckets(sdf.fillna(0.0))[1:-1] kbins = KBinsDiscretizer(n_bins=3, strategy='quantile') kbins.fit(pdf[['Age']].fillna(0.0)) pbuckets = kbins.bin_edges_[0] npt.assert_almost_equal(sbuckets, pbuckets) def test_stratify_length(sdf, pdf): # matches lengths only hdf = sdf.toHandy() sfare = hdf.stratify(['Pclass']).cols['Fare'].mode() pfare = pdf.groupby('Pclass').agg({'Fare': lambda v: stats.mode(v)[0]})['Fare'] npt.assert_array_almost_equal(sfare, pfare) def test_stratify_list(sdf, pdf): # list hdf = sdf.toHandy() sname = hdf.stratify(['Pclass']).take(1) sname = np.array(list(map(lambda row: row.Name, sname)), dtype=np.object) pname = pdf.groupby('Pclass')['Name'].first() npt.assert_equal(sname, pname) def test_stratify_pandas_df(sdf, pdf): # pd.DataFrame hdf = sdf.toHandy() scorr = hdf.stratify(['Pclass']).cols[['Fare', 'Age']].corr() pcorr = pdf.groupby('Pclass')[['Fare', 'Age']].corr() npt.assert_array_almost_equal(scorr.values, pcorr.values) def test_stratify_pandas_series(sdf, pdf): # pd.col hdf = sdf.toHandy() scounts = hdf.stratify(['Pclass']).cols['Embarked'].value_counts(dropna=True).sort_index() pcounts = pdf.groupby('Pclass')['Embarked'].value_counts().sort_index() npt.assert_array_almost_equal(scounts, pcounts) def test_stratify_spark_df(sdf, pdf): # pd.col hdf = sdf.toHandy() sfirst = hdf.dropna().stratify(['Pclass']).limit(1).drop('Pclass').toPandas() pfirst = pdf.dropna().groupby('Pclass').first().reset_index(drop=True) npt.assert_array_equal(sfirst, pfirst) def test_stratify_fill(sdf, pdf): hdf = sdf.toHandy() hdf_filled = hdf.stratify(['Pclass']).fill(continuous=['Age'], categorical=['Embarked']) hage = hdf_filled.orderBy('Pclass').cols['Age'][:].values hembarked = hdf_filled.orderBy('PassengerId').cols['Embarked'][:].values pdf_filled = [] statistics = {'Age': {}} for pclass in [1, 2, 3]: filtered = pdf.query('Pclass == {}'.format(pclass))[['Age']] imputer = Imputer(strategy='mean').fit(filtered) pdf_filled.append(imputer.transform(filtered)) statistics['Age'].update({'Pclass == "{}"'.format(pclass): imputer.statistics_[0]}) pdf_filled = np.concatenate(pdf_filled, axis=0) age = pdf_filled.ravel() npt.assert_array_equal(hage, age) npt.assert_array_equal(hembarked, pdf.fillna({'Embarked': 'S'}).sort_values(by='PassengerId')['Embarked'].values) npt.assert_array_equal(sorted(list(hdf_filled.statistics_['Age'])), sorted(list(statistics['Age']))) def test_repr(sdf): hdf = sdf.toHandy() repr = str(hdf.cols['Fare']) npt.assert_equal(repr, "HandyColumns[Fare]") def test_stratify_bucket(sdf): hdf = sdf.toHandy() hres = hdf.stratify(['Pclass', Bucket('Age', 3)]).cols['Embarked'].mode() npt.assert_equal(hres.values.ravel(), np.array(['S'] * 9)) hdf = sdf.toHandy() hres = hdf.stratify(['Pclass', Bucket('Age', 3)]).cols['Embarked'].value_counts().sort_index() npt.assert_equal(hres.values.ravel(), np.array([21, 23, 40, 2, 68, 13, 17, 8, 59, 7, 1, 86, 1, 11, 28, 14, 166, 13, 8, 119, 2, 5])) def test_stratified_nunique(sdf, pdf): hdf = sdf.toHandy() hnunique = hdf.stratify(['Pclass']).cols['Cabin'].nunique() nunique = pdf.groupby(['Pclass'])['Cabin'].nunique() npt.assert_array_equal(hnunique, nunique) def test_mahalanobis(sdf, pdf): colnames = ['Fare', 'SibSp', 'Parch'] hdf = sdf.toHandy() hres = hdf._handy._calc_mahalanobis_distance(colnames).toHandy().cols['__mahalanobis'][:].values pipeline = make_pipeline(StandardScaler()) pdf = pd.DataFrame(pipeline.fit_transform(pdf[colnames]), columns=colnames) invmat = np.linalg.inv(pdf.cov()) res = pdf.apply(lambda row: distance.mahalanobis(row.values, np.zeros_like(row.values), invmat), axis=1) npt.assert_array_almost_equal(hres, res, decimal=4) def test_entropy(sdf, pdf): hdf = sdf.toHandy() hres = hdf.cols['Pclass'].entropy() res = stats.entropy(pdf.groupby('Pclass').count().iloc[:, 0], base=2) npt.assert_array_almost_equal(hres, res) def test_mutual_info(sdf, pdf): hdf = sdf.toHandy() hres = hdf.cols[['Survived', 'Pclass']].mutual_info() res = mutual_info_score(pdf['Survived'], pdf['Pclass']) # converts to log2 res = np.log2(np.exp(res)) npt.assert_array_almost_equal(hres.loc['Survived', 'Pclass'], res) ================================================ FILE: tests/handyspark/sql/test_datetime.py ================================================ import numpy.testing as npt from handyspark import * def test_is_leap_year(sdates, pdates): hdf = sdates.toHandy() hdf = hdf.assign(newcol=hdf.pandas['dates'].dt.is_leap_year) hres = hdf.cols['newcol'][:20] res = pdates['dates'].dt.is_leap_year[:20] npt.assert_array_equal(hres, res) def test_strftime(sdates, pdates): hdf = sdates.toHandy() hdf = hdf.assign(newcol=hdf.pandas['dates'].dt.strftime(date_format='%Y-%m')) hres = hdf.cols['newcol'][:20] res = pdates['dates'].dt.strftime(date_format='%Y-%m')[:20] npt.assert_array_equal(hres, res) def test_weekday_name(sdates, pdates): hdf = sdates.toHandy() hdf = hdf.assign(newcol=hdf.pandas['dates'].dt.weekday_name) hres = hdf.cols['newcol'][:20] res = pdates['dates'].dt.weekday_name[:20] npt.assert_array_equal(hres, res) def test_round(sdates, pdates): hdf = sdates.toHandy() hdf = hdf.assign(newcol=hdf.pandas['dates'].dt.round(freq='D')) hres = hdf.cols['newcol'][:20] res = pdates['dates'].dt.round(freq='D')[:20] npt.assert_array_equal(hres, res) ================================================ FILE: tests/handyspark/sql/test_pandas.py ================================================ import numpy.testing as npt from handyspark import * # boolean returns def test_between(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Age'].between(left=20, right=40)) hres = hdf.cols['newcol'][:20] res = pdf['Age'].between(left=20, right=40)[:20] npt.assert_array_equal(hres, res) def test_isin(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Age'].isin(values=[22, 40])) hres = hdf.cols['newcol'][:20] res = pdf['Age'].isin(values=[22, 40])[:20] npt.assert_array_equal(hres, res) def test_isna(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Cabin'].isna()) hres = hdf.cols['newcol'][:20] res = pdf['Cabin'].isna()[:20] npt.assert_array_equal(hres, res) def test_notna(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Cabin'].notna()) hres = hdf.cols['newcol'][:20] res = pdf['Cabin'].notna()[:20] npt.assert_array_equal(hres, res) # same type returns def test_clip(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Age'].clip(lower=5, upper=50)) hres = hdf.cols['newcol'][:20] res = pdf['Age'].clip(lower=5, upper=50)[:20] npt.assert_array_equal(hres, res) def test_replace(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Age'].replace(to_replace=5, value=0)) hres = hdf.cols['newcol'][:20] res = pdf['Age'].replace(to_replace=5, value=0)[:20] npt.assert_array_equal(hres, res) def test_round(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Fare'].round(decimals=0)) hres = hdf.cols['newcol'][:20] res = pdf['Fare'].round(decimals=0)[:20] npt.assert_array_equal(hres, res) ================================================ FILE: tests/handyspark/sql/test_schema.py ================================================ import numpy as np import numpy.testing as npt from handyspark.sql import generate_schema def test_generate_schema(sdf): res = sdf.select(sorted(sdf.columns)).schema hres = generate_schema(dict(zip(sdf.columns, [np.int32, np.int32, np.int32, str, str, np.float64, np.int32, np.int32, str, np.float64, str, str]))) npt.assert_array_equal(hres, res) ================================================ FILE: tests/handyspark/sql/test_string.py ================================================ import numpy.testing as npt from handyspark import * # integer returns def test_count(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.count(pat='Mr.')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.count(pat='Mr.')[:20] npt.assert_array_equal(hres, res) def test_find(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.find(sub='Mr.')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.find(sub='Mr.')[:20] npt.assert_array_equal(hres, res) def test_len(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.len()) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.len()[:20] npt.assert_array_equal(hres, res) def test_rfind(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.rfind(sub='Mr.')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.rfind(sub='Mr.')[:20] npt.assert_array_equal(hres, res) # boolean returns def test_contains(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.contains(pat='Mr.')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.contains(pat='Mr.')[:20] npt.assert_array_equal(hres, res) def test_startswith(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.startswith(pat='Mr.')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.startswith(pat='Mr.')[:20] npt.assert_array_equal(hres, res) def test_match(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.match(pat='Mr.')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.match(pat='Mr.')[:20] npt.assert_array_equal(hres, res) def test_isalpha(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.isalpha()) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.isalpha()[:20] npt.assert_array_equal(hres, res) # string returns def test_replace(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.replace(pat='Mr.', repl='Mister')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.replace(pat='Mr.', repl='Mister')[:20] npt.assert_array_equal(hres, res) def test_repeat(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.repeat(repeats=2)) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.repeat(repeats=2)[:20] npt.assert_array_equal(hres, res) def test_join(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.join(sep=',')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.join(sep=',')[:20] npt.assert_array_equal(hres, res) def test_pad(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.pad(width=20)) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.pad(width=20)[:20] npt.assert_array_equal(hres, res) def test_slice(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.slice(start=5, stop=10)) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.slice(start=5, stop=10)[:20] npt.assert_array_equal(hres, res) def test_slice_replace(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.slice_replace(start=5, stop=10, repl='X')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.slice_replace(start=5, stop=10, repl='X')[:20] npt.assert_array_equal(hres, res) def test_strip(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.strip()) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.strip()[:20] npt.assert_array_equal(hres, res) def test_wrap(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.wrap(width=5)) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.wrap(width=5)[:20] npt.assert_array_equal(hres, res) def test_get(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.get(i=5)) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.get(i=5)[:20] npt.assert_array_equal(hres, res) def test_center(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.center(width=10)) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.center(width=10)[:20] npt.assert_array_equal(hres, res) def test_zfill(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.zfill(width=20)) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.zfill(width=20)[:20] npt.assert_array_equal(hres, res) def test_normalize(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.normalize(form='NFKD')) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.normalize(form='NFKD')[:20] npt.assert_array_equal(hres, res) def test_upper(sdf, pdf): hdf = sdf.toHandy() hdf = hdf.assign(newcol=hdf.pandas['Name'].str.upper()) hres = hdf.cols['newcol'][:20] res = pdf['Name'].str.upper()[:20] npt.assert_array_equal(hres, res) ================================================ FILE: tests/handyspark/sql/test_transform.py ================================================ import numpy.testing as npt from pyspark.sql.types import DoubleType, StringType from handyspark import * def test_apply_axis0(sdf, pdf): hdf = sdf.toHandy() # setting the type manually hres1 = hdf.apply(lambda Fare: Fare.map('${:,.2f}'.format), 'new', returnType='string').cols['new'][:] # setting the type using an extension hres2 = hdf.apply(StringType.ret(lambda Fare: Fare.map('${:,.2f}'.format)), 'new').cols['new'][:] res = pdf.Fare.map('${:,.2f}'.format) npt.assert_array_equal(hres1, res) npt.assert_array_equal(hres2, res) def test_apply_axis1(sdf, pdf): hdf = sdf.toHandy() # setting the type manually hres1 = hdf.apply(lambda Fare, Age: Fare / Age, 'new', returnType='double').cols['new'][:] # setting the type using an extension hres2 = hdf.apply(DoubleType.ret(lambda Fare, Age: Fare / Age), 'new').cols['new'][:] # inferring type from 1st argument hres3 = hdf.apply(lambda Fare, Age: Fare / Age, 'new').cols['new'][:] res = pdf.apply(lambda row: row.Fare / row.Age, axis=1) npt.assert_array_equal(hres1, res) npt.assert_array_equal(hres2, res) npt.assert_array_equal(hres3, res) def test_transform_axis0(sdf, pdf): hdf = sdf.toHandy() # setting the type manually hres1 = hdf.transform(lambda Fare: Fare.map('${:,.2f}'.format), 'new', returnType='string').cols['new'][:] # setting the type using an extension hres2 = hdf.transform(StringType.ret(lambda Fare: Fare.map('${:,.2f}'.format)), 'new').cols['new'][:] res = pdf.Fare.map('${:,.2f}'.format) npt.assert_array_equal(hres1, res) npt.assert_array_equal(hres2, res) def test_transform_axis1(sdf, pdf): hdf = sdf.toHandy() # setting the type manually hres1 = hdf.transform(lambda Fare, Age: Fare / Age, 'new', returnType='double').cols['new'][:] # setting the type using an extension hres2 = hdf.transform(DoubleType.ret(lambda Fare, Age: Fare / Age), 'new').cols['new'][:] # inferring type from 1st argument hres3 = hdf.transform(lambda Fare, Age: Fare / Age, 'new').cols['new'][:] res = pdf.apply(lambda row: row.Fare / row.Age, axis=1) npt.assert_array_equal(hres1, res) npt.assert_array_equal(hres2, res) npt.assert_array_equal(hres3, res) def test_assign_axis0(sdf, pdf): hdf = sdf.toHandy() # setting the type using an extension hres = hdf.assign(new=StringType.ret(lambda Fare: Fare.map('${:,.2f}'.format))).cols['new'][:] res = pdf.assign(new=pdf.Fare.map('${:,.2f}'.format))['new'] npt.assert_array_equal(hres, res) def test_assign_axis1(sdf, pdf): hdf = sdf.toHandy() # inferring type from 1st argument hres1 = hdf.assign(new=lambda Fare, Age: Fare / Age).cols['new'][:] # setting the type using an extension hres2 = hdf.assign(new=DoubleType.ret(lambda Fare, Age: Fare / Age)).cols['new'][:] res = pdf.assign(new=pdf.Fare / pdf.Age)['new'] npt.assert_array_almost_equal(hres1, res) npt.assert_array_almost_equal(hres2, res) ================================================ FILE: tests/handyspark/test_plot.py ================================================ import base64 import numpy.testing as npt import numpy as np import seaborn as sns from handyspark import * from handyspark.plot import consolidate_plots, strat_histogram from io import BytesIO from matplotlib import pyplot as plt def plot_to_base64(fig): bytes_data = BytesIO() fig.savefig(bytes_data, format='png') bytes_data.seek(0) b64_data = base64.b64encode(bytes_data.read()) plt.close(fig) return b64_data def plot_to_pixels(fig, shape=None): fig.canvas.draw() rgb_data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') plt.clf() plt.cla() plt.close(fig) if shape is None: rgb_data = rgb_data.reshape((int(len(rgb_data) / 3), 3)) else: rgb_data = rgb_data.reshape(shape) return rgb_data def test_boxplot_single(sdf, pdf): pax = pdf[['Fare']].boxplot(showfliers=False) pax.legend().remove() pax.set_ylabel('') p64 = plot_to_base64(pax.figure) hdf = sdf.toHandy() sax = hdf.cols['Fare'].boxplot(showfliers=False, precision=.0001) s64 = plot_to_base64(sax.figure) npt.assert_equal(p64, s64) def test_boxplot_multiple(sdf, pdf): pax = pdf[['Fare', 'Age']].boxplot(showfliers=False) pax.legend().remove() pax.set_ylabel('') p64 = plot_to_pixels(pax.figure, (480, 640, 3)) # Spark computes quartiles approximately, # so it results in a small difference between the plots hdf = sdf.toHandy() sax = hdf.cols[['Fare', 'Age']].boxplot(showfliers=False, precision=.0001) s64 = plot_to_pixels(sax.figure, (480, 640, 3)) diff = s64 - p64 npt.assert_equal(diff.sum(), 110414) npt.assert_equal((diff != 0).sum(), 871) def test_hist_categorical(sdf, pdf): hdf = sdf.toHandy() sax = hdf.dropna(subset=['Embarked']).cols['Embarked'].hist() s64 = plot_to_base64(sax.figure) pdf = pdf.groupby(['Embarked'])['PassengerId'].count().sort_index() pax = pdf.plot(kind='bar', color='C0', legend=False, rot=0, ax=None, title='Embarked') p64 = plot_to_base64(pax.figure) npt.assert_equal(p64, s64) def test_hist_continuous(sdf, pdf): hdf = sdf.toHandy() sax = hdf.cols['Fare'].hist(bins=5) s64 = plot_to_base64(sax.figure) pax = pdf[['Fare']].plot.hist(bins=5) pax.legend().remove() pax.set_ylabel('') pax.set_title('Fare') p64 = plot_to_base64(pax.figure) npt.assert_equal(p64, s64) def test_scatterplot(sdf, pdf): hdf = sdf.toHandy() sax = hdf.fillna({'Age': 29.0}).cols[['Fare', 'Age']].scatterplot() sax.set_xlim([0, 515]) sax.set_ylim([0, 85]) s64 = plot_to_pixels(sax.figure, (480, 640, 3)) # Traditional plot is not bucketized! pdf = pdf.fillna({'Age': 29.0}) df_counts = pdf.groupby(['Fare', 'Age'])['PassengerId'].count().to_frame('Proportion') df_counts.loc[:, 'Proportion'] = df_counts['Proportion'].apply(lambda p: round(p / 891, 4)) pax = sns.scatterplot(data=df_counts.reset_index(), x='Fare', y='Age', size='Proportion', legend=False) pax.set_xlim([0, 515]) pax.set_ylim([0, 85]) p64 = plot_to_pixels(pax.figure, (480, 640, 3)) # Differences arise from bucketized vs not bucketized scatterplots diff = s64 - p64 npt.assert_equal(diff.sum(), 4759745) npt.assert_equal((diff != 0).sum(), 45616) def test_stratified_boxplot(sdf, pdf): hdf = sdf.toHandy() sfig, _ = hdf.stratify(['Pclass']).cols['Fare'].boxplot(showfliers=False, precision=.0001) s64 = plot_to_pixels(sfig, (480, 640, 3)) pax = pdf.boxplot('Fare', by='Pclass', showfliers=False) pax.set_xlabel('') plt.suptitle('') plt.xticks([1, 2, 3], ['Pclass={}'.format(i) for i in [1, 2, 3]]) plt.tight_layout() p64 = plot_to_pixels(pax.figure, (480, 640, 3)) # Differences arise from quantile calculations diff = s64 - p64 npt.assert_equal(diff.sum(), 276595) npt.assert_equal((diff != 0).sum(), 2146) def test_stratified_hist(sdf, pdf): hdf = sdf.toHandy() bins, _ = strat_histogram(hdf, 'Fare', bins=10, categorical=False) sfig, _ = hdf.stratify(['Pclass', 'Embarked']).cols['Fare'].hist() s64 = plot_to_pixels(sfig, (480, 640, 3)) paxes = pdf.groupby(['Pclass', 'Embarked'])['Fare'].hist() pfig, axes = plt.subplots(3, 3) axes = [ax for row in axes for ax in row] idx = 0 clauses = [] for embarked in ['C', 'Q', 'S']: for pclass in [1, 2, 3]: clause = 'Pclass == {} and Embarked == "{}"'.format(pclass, embarked) clauses.append(clause) pdf.query(clause)['Fare'].hist(ax=axes[idx], bins=bins) axes[idx].grid(False) idx += 1 pfig, _ = consolidate_plots(pfig, axes, 'Fare', clauses) p64 = plot_to_pixels(pfig, (480, 640, 3)) npt.assert_equal(s64, p64) ================================================ FILE: tests/handyspark/test_stats.py ================================================ import numpy.testing as npt from handyspark.stats import KolmogorovSmirnovTest from pyspark.sql import functions as F def test_ks(sdf): # generates uniform sdf = sdf.withColumn('rand', F.rand(42)) # compares with uniform,it should NOT reject pval = KolmogorovSmirnovTest(sdf, 'rand', dist='uniform').pValue npt.assert_equal(pval > .05, True) # compares with normal, it SHOULD reject pval = KolmogorovSmirnovTest(sdf, 'rand').pValue npt.assert_equal(pval < .05, True) # generates normal sdf = sdf.withColumn('rand', F.randn(42)) # compares with normal, it should NOT reject pval = KolmogorovSmirnovTest(sdf, 'rand').pValue npt.assert_equal(pval > .05, True) # compares with uniform, it SHOULD reject pval = KolmogorovSmirnovTest(sdf, 'rand', dist='uniform').pValue npt.assert_equal(pval < .05, True) ================================================ FILE: tests/handyspark/test_util.py ================================================ import numpy.testing as npt from pyspark.ml.feature import VectorAssembler from handyspark.util import dense_to_array, disassemble def test_dense_to_array(sdf): assem = VectorAssembler(inputCols=['Pclass', 'Fare', 'Age'], outputCol='features') tdf = assem.transform(sdf.dropna()) tdf = dense_to_array(tdf, 'features', 'array_features') npt.assert_array_equal(tdf.cols['features'][:], tdf.cols['array_features'][:]) def test_disassemble(sdf): assem = VectorAssembler(inputCols=['Pclass', 'Fare', 'Age'], outputCol='features') tdf = assem.transform(sdf.dropna()) tdf = disassemble(tdf, 'features') npt.assert_array_equal(tdf.cols['Pclass'][:], tdf.cols['features_0'][:]) npt.assert_array_equal(tdf.cols['Fare'][:], tdf.cols['features_1'][:]) npt.assert_array_equal(tdf.cols['Age'][:], tdf.cols['features_2'][:]) ================================================ FILE: tests/rawdata/train.csv ================================================ PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S 2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C 3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S 4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S 5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S 6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q 7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S 8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S 9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S 10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C 11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S 12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S 13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S 14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S 15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S 16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S 17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q 18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S 19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S 20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C 21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S 22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S 23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q 24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S 25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S 26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S 27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C 28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S 29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q 30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S 31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C 32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C 33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q 34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S 35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C 36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S 37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C 38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S 39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S 40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C 41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S 42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S 43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C 44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C 45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q 46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S 47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q 48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q 49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C 50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S 51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S 52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S 53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C 54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S 55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C 56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S 57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S 58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C 59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S 60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S 61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C 62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28, 63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S 64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S 65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C 66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C 67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S 68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S 69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S 70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S 71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S 72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S 73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S 74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C 75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S 76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S 77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S 78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S 79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S 80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S 81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S 82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S 83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q 84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S 85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S 86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S 87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S 88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S 89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S 90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S 91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S 92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S 93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S 94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S 95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S 96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S 97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C 98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C 99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S 100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S 101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S 102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S 103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S 104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S 105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S 106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S 107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S 108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S 109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S 110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q 111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S 112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C 113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S 114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S 115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C 116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S 117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q 118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S 119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C 120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S 121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S 122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S 123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C 124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S 125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S 126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C 127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q 128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S 129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C 130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S 131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C 132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S 133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S 134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S 135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S 136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C 137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S 138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S 139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S 140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C 141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C 142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S 143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S 144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q 145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S 146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S 147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S 148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S 149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S 150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S 151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S 152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S 153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S 154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S 155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S 156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C 157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q 158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S 159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S 160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S 161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S 162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S 163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S 164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S 165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S 166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S 167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S 168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S 169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S 170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S 171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S 172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q 173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S 174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S 175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C 176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S 177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S 178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C 179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S 180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S 181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S 182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C 183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S 184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S 185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S 186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S 187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q 188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S 189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q 190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S 191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S 192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S 193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S 194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S 195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C 196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C 197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q 198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S 199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q 200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S 201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S 202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S 203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S 204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C 205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S 206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S 207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S 208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C 209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q 210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C 211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S 212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S 213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S 214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S 215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q 216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C 217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S 218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S 219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C 220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S 221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S 222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S 223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S 224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S 225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S 226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S 227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S 228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S 229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S 230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S 231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S 232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S 233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S 234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S 235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S 236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S 237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S 238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S 239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S 240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S 241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C 242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q 243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S 244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S 245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C 246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q 247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S 248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S 249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S 250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S 251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S 252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S 253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S 254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S 255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S 256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C 257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C 258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S 259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C 260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S 261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q 262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S 263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S 264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S 265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q 266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S 267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S 268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S 269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S 270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S 271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S 272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S 273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S 274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C 275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q 276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S 277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S 278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S 279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q 280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S 281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q 282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S 283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S 284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S 285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S 286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C 287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S 288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S 289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S 290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q 291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S 292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C 293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C 294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S 295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S 296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C 297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C 298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S 299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S 300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C 301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q 302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q 303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S 304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q 305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S 306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S 307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C 308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C 309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C 310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C 311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C 312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C 313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S 314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S 315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S 316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S 317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S 318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S 319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S 320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C 321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S 322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S 323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q 324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S 325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S 326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C 327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S 328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S 329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S 330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C 331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q 332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S 333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S 334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S 335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S 336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S 337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S 338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C 339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S 340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S 341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S 342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S 343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S 344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S 345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S 346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S 347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S 348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S 349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S 350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S 351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S 352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S 353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C 354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S 355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C 356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S 357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S 358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S 359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q 360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q 361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S 362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C 363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C 364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S 365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q 366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S 367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C 368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C 369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q 370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C 371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C 372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S 373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S 374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C 375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S 376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C 377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S 378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C 379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C 380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S 381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C 382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C 383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S 384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S 385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S 386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S 387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S 388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S 389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q 390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C 391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S 392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S 393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S 394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C 395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S 396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S 397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S 398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S 399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S 400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S 401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S 402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S 403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S 404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S 405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S 406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S 407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S 408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S 409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S 410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S 411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S 412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q 413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q 414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S 415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S 416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S 417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S 418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S 419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S 420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S 421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C 422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q 423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S 424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S 425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S 426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S 427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S 428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S 429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q 430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S 431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S 432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S 433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S 434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S 435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S 436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S 437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S 438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S 439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S 440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S 441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S 442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S 443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S 444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S 445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S 446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S 447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S 448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S 449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C 450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S 451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S 452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S 453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C 454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C 455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S 456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C 457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S 458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S 459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S 460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q 461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S 462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S 463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S 464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S 465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S 466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S 467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S 468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S 469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q 470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C 471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S 472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S 473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S 474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C 475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S 476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S 477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S 478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S 479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S 480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S 481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S 482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S 483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S 484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S 485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C 486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S 487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S 488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C 489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S 490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S 491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S 492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S 493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S 494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C 495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S 496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C 497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C 498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S 499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S 500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S 501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S 502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q 503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q 504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S 505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S 506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C 507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S 508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S 509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S 510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S 511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q 512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S 513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S 514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C 515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S 516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S 517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S 518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q 519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S 520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S 521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S 522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S 523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C 524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C 525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C 526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q 527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S 528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S 529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S 530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S 531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S 532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C 533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C 534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C 535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S 536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S 537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S 538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C 539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S 540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C 541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S 542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S 543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S 544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S 545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C 546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S 547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S 548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C 549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S 550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S 551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C 552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S 553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q 554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C 555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S 556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S 557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C 558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C 559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S 560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S 561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q 562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S 563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S 564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S 565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S 566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S 567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S 568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S 569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C 570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S 571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S 572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S 573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S 574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q 575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S 576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S 577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S 578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S 579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C 580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S 581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S 582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C 583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S 584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C 585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C 586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S 587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S 588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C 589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S 590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S 591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S 592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C 593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S 594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q 595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S 596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S 597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S 598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S 599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C 600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C 601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S 602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S 603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S 604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S 605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C 606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S 607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S 608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S 609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C 610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S 611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S 612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S 613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q 614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q 615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S 616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S 617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S 618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S 619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S 620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S 621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C 622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S 623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C 624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S 625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S 626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S 627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q 628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S 629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S 630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q 631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S 632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S 633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C 634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S 635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S 636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S 637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S 638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S 639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S 640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S 641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S 642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C 643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S 644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S 645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C 646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C 647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S 648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C 649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S 650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S 651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S 652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S 653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S 654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q 655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q 656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S 657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S 658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q 659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S 660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C 661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S 662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C 663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S 664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S 665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S 666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S 667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S 668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S 669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S 670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S 671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S 672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S 673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S 674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S 675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S 676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S 677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S 678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S 679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S 680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C 681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q 682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C 683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S 684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S 685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S 686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C 687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S 688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S 689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S 690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S 691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S 692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C 693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S 694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C 695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S 696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S 697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S 698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q 699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C 700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S 701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C 702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S 703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C 704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q 705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S 706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S 707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S 708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S 709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S 710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C 711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C 712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S 713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S 714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S 715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S 716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S 717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C 718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S 719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q 720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S 721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S 722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S 723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S 724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S 725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S 726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S 727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S 728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q 729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S 730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S 731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S 732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C 733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S 734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S 735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S 736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S 737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S 738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C 739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S 740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S 741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S 742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S 743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C 744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S 745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S 746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S 747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S 748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S 749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S 750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q 751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S 752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S 753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S 754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S 755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S 756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S 757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S 758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S 759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S 760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S 761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S 762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S 763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C 764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S 765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S 766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S 767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C 768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q 769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q 770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S 771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S 772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S 773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S 774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C 775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S 776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S 777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q 778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S 779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q 780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S 781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C 782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S 783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S 784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S 785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S 786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S 787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S 788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q 789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S 790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C 791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q 792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S 793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S 794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C 795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S 796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S 797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S 798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S 799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C 800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S 801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S 802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S 803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S 804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C 805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S 806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S 807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S 808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S 809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S 810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S 811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S 812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S 813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S 814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S 815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S 816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S 817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S 818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C 819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S 820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S 821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S 822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S 823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S 824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S 825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S 826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q 827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S 828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C 829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q 830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28, 831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C 832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S 833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C 834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S 835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S 836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C 837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S 838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S 839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S 840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C 841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S 842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S 843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C 844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C 845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S 846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S 847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S 848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C 849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S 850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C 851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S 852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S 853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C 854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S 855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S 856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S 857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S 858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S 859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C 860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C 861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S 862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S 863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S 864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S 865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S 866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S 867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C 868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S 869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S 870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S 871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S 872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S 873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S 874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S 875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C 876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C 877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S 878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S 879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S 880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C 881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S 882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S 883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S 884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S 885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S 886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q 887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S 888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S 889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S 890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C 891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q