Repository: conversationai/conversationai-models
Branch: main
Commit: d3a724c96e24
Files: 196
Total size: 953.6 KB

Directory structure:
gitextract__2536wl_/

├── .bazelrc
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── annotator_models/
│   ├── README.md
│   ├── bin/
│   │   ├── cancel-job
│   │   ├── ls-jobs
│   │   ├── run
│   │   ├── run_local
│   │   └── stream-logs
│   ├── cpu_config.yaml
│   ├── requirements.txt
│   ├── results/
│   │   └── .gitignore
│   └── trainer/
│       ├── __init__.py
│       ├── dawid_skene.py
│       └── dawid_skene_test.py
├── attention-tutorial/
│   ├── Attention_Model_Tutorial.ipynb
│   ├── README.md
│   ├── checkpoints/
│   │   └── README.md
│   ├── data/
│   │   └── README.md
│   ├── process_figshare.py
│   ├── requirements.txt
│   └── visualize_attention.py
├── data_preparation/
│   ├── README.md
│   ├── config.ini
│   ├── preprocessing/
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── preprocessing.py
│   │   └── tfrecord_utils.py
│   ├── requirements.txt
│   ├── run_preprocessing_artificial_bias.py
│   ├── run_preprocessing_data_split.py
│   └── setup.py
├── experiments/
│   ├── .gitignore
│   ├── README.md
│   ├── WORKSPACE
│   ├── __init__.py
│   ├── requirements.txt
│   ├── setup.py
│   ├── testdata/
│   │   ├── BUILD
│   │   ├── cats_and_dogs.jsonl
│   │   ├── cats_and_dogs_onehot.vocab.txt
│   │   ├── cats_and_dogs_with_cat_opt_int_labels.jsonl
│   │   └── cats_and_dogs_with_partial_cat_int_labels.jsonl
│   ├── tf_trainer/
│   │   ├── __init__.py
│   │   ├── common/
│   │   │   ├── BUILD
│   │   │   ├── __init__.py
│   │   │   ├── base_model.py
│   │   │   ├── basic_gpu_config.yaml
│   │   │   ├── cnn_spec_parser.py
│   │   │   ├── cnn_spec_parser_test.py
│   │   │   ├── dataset_config.sh
│   │   │   ├── dataset_input.py
│   │   │   ├── episodic_tfrecord_input.py
│   │   │   ├── episodic_tfrecord_input_test.py
│   │   │   ├── model_trainer.py
│   │   │   ├── p100_config.yaml
│   │   │   ├── serving_input.py
│   │   │   ├── text_preprocessor.py
│   │   │   ├── text_preprocessor_test.py
│   │   │   ├── tfrecord_input.py
│   │   │   ├── tfrecord_input_test.py
│   │   │   ├── token_embedding_index.py
│   │   │   ├── token_embedding_index_test.py
│   │   │   ├── types.py
│   │   │   └── v100_config.yaml
│   │   ├── tf_char_cnn/
│   │   │   ├── __init__.py
│   │   │   ├── hparam_config.yaml
│   │   │   ├── hparam_config_civil_comments.yaml
│   │   │   ├── hparam_config_many_communities.yaml
│   │   │   ├── hparam_config_toxicity.yaml
│   │   │   ├── model.py
│   │   │   ├── run.deploy.sh
│   │   │   ├── run.hyperparameter.sh
│   │   │   ├── run.local.sh
│   │   │   ├── run.ml_engine.sh
│   │   │   └── run.py
│   │   ├── tf_cnn/
│   │   │   ├── __init__.py
│   │   │   ├── finetune.py
│   │   │   ├── finetune.sh
│   │   │   ├── hparam_config.yaml
│   │   │   ├── hparam_config_civil_comments.yaml
│   │   │   ├── hparam_config_many_communities.yaml
│   │   │   ├── hparam_config_many_communities_40_per_8_shot.yaml
│   │   │   ├── hparam_config_toxicity.yaml
│   │   │   ├── model.py
│   │   │   ├── run.deploy.sh
│   │   │   ├── run.hyperparameter.sh
│   │   │   ├── run.local.sh
│   │   │   ├── run.ml_engine.sh
│   │   │   └── run.py
│   │   ├── tf_gru_attention/
│   │   │   ├── __init__.py
│   │   │   ├── finetune.py
│   │   │   ├── finetune.sh
│   │   │   ├── hparam_config.yaml
│   │   │   ├── hparam_config_civil_comments.yaml
│   │   │   ├── hparam_config_many_communities.yaml
│   │   │   ├── hparam_config_many_communities_40_per_8_shot.yaml
│   │   │   ├── hparam_config_toxicity.yaml
│   │   │   ├── model.py
│   │   │   ├── run.deploy.sh
│   │   │   ├── run.hyperparameter.sh
│   │   │   ├── run.local.sh
│   │   │   ├── run.ml_engine.sh
│   │   │   └── run.py
│   │   ├── tf_hub_classifier/
│   │   │   ├── __init__.py
│   │   │   ├── finetune.py
│   │   │   ├── finetune.sh
│   │   │   ├── hparam_config.yaml
│   │   │   ├── hparam_config_civil_comments.yaml
│   │   │   ├── hparam_config_many_communities.yaml
│   │   │   ├── hparam_config_many_communities_40_per_8_shot.yaml
│   │   │   ├── hparam_config_toxicity.yaml
│   │   │   ├── model.py
│   │   │   ├── run.deploy.sh
│   │   │   ├── run.hyperparameter.sh
│   │   │   ├── run.local.sh
│   │   │   ├── run.ml_engine.sh
│   │   │   └── run.py
│   │   ├── tf_hub_tfjs/
│   │   │   ├── __init__.py
│   │   │   ├── model.py
│   │   │   ├── notebook/
│   │   │   │   ├── BiasEvaluation.ipynb
│   │   │   │   └── EvaluatingClassifier.ipynb
│   │   │   ├── run.local.sh
│   │   │   └── run.py
│   │   ├── tf_kona_prototypical_network/
│   │   │   └── proto.py
│   │   └── tf_word_label_embedding/
│   │       ├── __init__.py
│   │       ├── hparam_config.yaml
│   │       ├── model.py
│   │       ├── run.hyperparameter.sh
│   │       ├── run.local.sh
│   │       ├── run.ml_engine.sh
│   │       └── run.py
│   └── tools/
│       ├── bert_tfrecord_converter.py
│       ├── convert_csv_to_tfrecord.py
│       └── convert_jsonl_to_tfrecord.py
├── hierarchical_attention_research/
│   └── han_model/
│       ├── .gitignore
│       ├── HAN_model.py
│       ├── LICENSE
│       ├── README.md
│       ├── bn_lstm.py
│       ├── bn_lstm_test.py
│       ├── data_util.py
│       ├── model_components.py
│       ├── requirements.txt
│       ├── worker.py
│       ├── yelp.py
│       └── yelp_prepare.py
├── kaggle-classification/
│   ├── .gitignore
│   ├── README.md
│   ├── __init__.py
│   ├── bin/
│   │   ├── cancel-job
│   │   ├── ls-jobs
│   │   ├── run
│   │   ├── run_keras.sh
│   │   ├── run_keras_local.sh
│   │   ├── run_local
│   │   └── stream-logs
│   ├── config.yaml
│   ├── gpu_config.yaml
│   ├── hparam_config.yaml
│   ├── keras_hparam_config.yaml
│   ├── keras_trainer/
│   │   ├── __init__.py
│   │   ├── base_model.py
│   │   ├── cnn_with_attention.py
│   │   ├── custom_metrics.py
│   │   ├── model.py
│   │   ├── rnn.py
│   │   └── single_layer_cnn.py
│   ├── requirements.txt
│   ├── setup.py
│   └── trainer/
│       ├── __init__.py
│       ├── model.py
│       └── wikidata.py
├── model_evaluation/
│   ├── BiosBias Evaluation.ipynb
│   ├── Predict bias.ipynb
│   ├── README.md
│   ├── deploy_models.sh
│   ├── few_shot_learning_baseline_evaluation.ipynb
│   ├── input_fn_example.py
│   ├── jigsaw_evaluation_pipeline.ipynb
│   ├── requirements.txt
│   ├── score_bias_data.sh
│   ├── score_scrubbed_data.sh
│   ├── score_test_data.py
│   └── utils_export/
│       ├── __init__.py
│       ├── dataset.py
│       ├── dataset_test.py
│       ├── deploy_list_models.py
│       ├── utils_cloudml.py
│       ├── utils_cloudml_test.py
│       ├── utils_tfrecords.py
│       └── utils_tfrecords_test.py
└── travis_blase_test_support/
    └── bazel_0.18.1-linux-x86_64.deb.sha256

================================================
FILE CONTENTS
================================================

================================================
FILE: .bazelrc
================================================
startup --host_jvm_args=-Xmx2500m
startup --host_jvm_args=-Xms2500m
startup --batch
test --ram_utilization_factor=10

build --verbose_failures
build --spawn_strategy=standalone --genrule_strategy=standalone
test --test_strategy=standalone


================================================
FILE: .gitignore
================================================
# Editor config.
.vscode/

# Python Compiles files.
*.pyc

# Virtual Environment files.
.pyenv
.virtualenv
env
.venv

# mypy cache files for type-checking.
.mypy_cache

# Bazel
bazel-bin
bazel-experiments
bazel-genfiles
bazel-out
bazel-testlogs


================================================
FILE: .travis.yml
================================================
language: python

python:
  - "3.5"
  - "3.6"

dist: trusty

addons:
  apt:
    sources:
      - ubuntu-toolchain-r-test
    packages:
      - wget
      - pkg-config

before_install:
  - wget https://github.com/bazelbuild/bazel/releases/download/0.18.1/bazel_0.18.1-linux-x86_64.deb
  - sha256sum -c travis_blase_test_support/bazel_0.18.1-linux-x86_64.deb.sha256
  - sudo dpkg -i bazel_0.18.1-linux-x86_64.deb
  - cd experiments

install:
  - pip install -r requirements.txt

script:
  - bazel test --test_output=streamed ...


================================================
FILE: CONTRIBUTING.md
================================================
# How to contribute

We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.

## Contributor License Agreement

Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution,
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.

You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.

## Code reviews

All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult [GitHub Help] for more
information on using pull requests.

[GitHub Help]: https://help.github.com/articles/about-pull-requests/


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright {yyyy} {name of copyright owner}

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# ConversationAI Models

This repository is contains example code to train machine learning models for text classification as part of the [Conversation AI](https://conversationai.github.io/) project.

# Outline of the codebase

* `experiments/` contains the ML training framework.
* `annotator-models/` contains a Dawid-Skene implementation for modelling rater quality to produce better annotations.
* `attention-tutorial/` contains an introductory ipython notebook for RNNs with attention, as presented at Devoxx talk ["Tensorflow, deep learning and modern RNN architectures, without a PhD by Martin Gorner"](https://www.youtube.com/watch?v=pzOzmxCR37I)
* `kaggle-classification/` early experiments with Keras and Estimator for training on [the Jigsaw Toxicity Kaggle competition](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge). Will be superceeded by `experiments/` shortly.
* `model_evaluation/` contains utilities to use a model deployed on cloud MLE, and some notebooks to illustrate typical evaluation metrics.

## About this code

This repository contains example code to help experiment with models to improve conversations; it is not an official Google product.


================================================
FILE: annotator_models/README.md
================================================
# Modeling Anotators

This is an implementation of the [Dawid-Skene model](http://crowdsourcing-class.org/readings/downloads/ml/EM.pdf). Dawid-Skene is an unsupervised model that can be used to improve the quality of a crowdsourced dataset by learning annotator error rate and predicting the true item labels.

This code was adapted from an [implementation](https://github.com/dallascard/dawid_skene) by [dallascard](https://github.com/dallascard).

## To Run Locally

1.  Setup a [virtualenv](https://virtualenvwrapper.readthedocs.io/en/latest/) for
    the project (recommended, but technically optional).

    Python 2:

    ```
    python -m virtualenv env
    ```

    Python 3:

    ```
    python3 -m venv env
    ```

    From either to enter your virtual env:

    ```shell
    source env/bin/activate
    ```

2.  Install library dependencies:

    ```shell
    pip install -r requirements.txt
    ```

3.  Create training data. The training data must be a CSV that has fields for
    the worker ID, item ID and label. You can specify the column names for these
    fields as flags to the training script.

    For example:
    ```
    comment_id,worker_id,toxic
    1519346288,43675129,0
    1519346288,41122119,0
    1519346288,38510102,0
    1519346288,43650017,0
    1519346288,28524232,0
    ...
    ```

4.  Run a model on a given class (e.g. 'toxic' or 'obscene'). There are examples
    of how to run the model locally and using ml-engine in [`bin/run_local`](bin/run_local) and
    [`bin/run`](bin/run) respectively.

    Note: to run in google cloud, you will need to be authenticated with
    Google Cloud (you can run `gcloud auth application-default login` to do
    this) and you must have access to the cloud bucket where the data is located
    (you can test this by running `gcloud storage ls  gs://kaggle-model-experiments/`).

5. The output is two files written to the `job-dir` directory specified in the run
    script.
   * `error_rates_{LABEL}_{N_ANNOTATIONS}.csv` - the error rates for each annotator
   * `predictions_{LABEL}_{N_ANNOTATIONS}.csv` - the predicted labels for each item

================================================
FILE: annotator_models/bin/cancel-job
================================================
#!/bin/bash

gcloud ml-engine jobs cancel $1


================================================
FILE: annotator_models/bin/ls-jobs
================================================
#!/bin/bash

gcloud ml-engine jobs list | grep $USER


================================================
FILE: annotator_models/bin/run
================================================
#!/bin/bash

#
# A script to train the kaggle model remotely using ml-engine.
#
# To run with default hyperparameters from the kaggle-classification directory just enter:
# './bin/run'
#
#
#
# Setup Steps:
# 1. Install the gcloud SDK
# 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]`
# 3. Put the train and test data in Cloud Storage, `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/`
#

# Edit these!
BUCKET_NAME=annotator_models
CONFIG=cpu_config.yaml
MAX_ITER=50
TOLERANCE=1
PSEUDO_COUNT=1

declare -a LABELS=("obscene" "sexual_explicit" "threat" "flirtation" "identity_hate" "insult")

# Note: this must be compatible with cells that have GPUs. us-central1 works.
# See: https://cloud.google.com/ml-engine/docs/using-gpus
REGION=us-central1

while getopts :c:h opt; do
case ${opt} in
h)
    echo "Usage: run [-c config_filename.yaml]"
    echo "Flags: "
    echo -e " -c Specify a config file (e.g. use hparam_config to enable hyperparameter tuning)"
    exit 0;;
c)
    echo "Using custom config ${OPTARG}"
    CONFIG=${OPTARG};;
:)
    echo "Error: ${OPTARG} requires an argument."
    echo "Use 'run -h' for help."
    exit 1;;
\?)
    echo "Invalid flag. Use 'run -h' for help."
    exit 1;;
esac
done


echo "Writing to $OUTPUT_PATH"


for label in "${LABELS[@]}"
do
  echo "Running on $label"
  DATA_PATH=gs://annotator_models/kaggle_annotation_data/dawid_skene_annotations_on_kaggle_combined_${label}.csv
  COMMENT_TEXT_PATH=$DATA_PATH

  JOB_NAME=${USER}_dawid_skene_kaggle_${label}
  DATE=`date '+%Y%m%d_%H%M%S'`
  DATE_DAY_ONLY=`date '+%Y%m%d'`
  OUTPUT_PATH=gs://${BUCKET_NAME}/models/${USER}/${DATE_DAY_ONLY}

  gcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \
         --job-dir=${OUTPUT_PATH} \
         --runtime-version=1.4 \
         --config=${CONFIG} \
         --module-name=trainer.dawid_skene \
         --package-path=trainer \
         --region=$REGION \
         --verbosity=debug -- \
         --data-path=$DATA_PATH \
         --comment-text-path=$COMMENT_TEXT_PATH \
         --label=$label \
         --max-iter=$MAX_ITER \
         --tolerance=$TOLERANCE \
         --worker-id-col='annotator_id' \
         --unit-id-col='comment_id' \
	 --pseudo-count=$PSEUDO_COUNT
done


================================================
FILE: annotator_models/bin/run_local
================================================
#!/bin/bash

# A script to train the kaggle model locally.

DATE=`date '+%Y%m%d_%H%M%S'`
BUCKET_NAME=annotator_models

declare -a arr=("obscene" "threat" "insult" "identity_hate" "toxic" "severe_toxic")


## now loop through the above array
for label in "${arr[@]}"
do
  data_path=gs://${BUCKET_NAME}/kaggle_annotation_data/dawid_skene_annotations_on_kaggle_test_${label}.csv \

  gcloud ml-engine local train \
     --module-name=trainer.dawid_skene \
     --package-path=trainer -- \
     --data-path=${data_path} \
     --comment-text-path=${data_path} \
     --label=${label} \
     --job-dir='results' \
     --worker-id-col='annotator_id' \
     --unit-id-col='comment_id' \
     --tolerance=50 \
     --n_examples=1000
done


================================================
FILE: annotator_models/bin/stream-logs
================================================
#!/bin/bash

gcloud ml-engine jobs stream-logs $1


================================================
FILE: annotator_models/cpu_config.yaml
================================================
trainingInput:
  scaleTier: CUSTOM
  ## Custom scaleTier needed for using > 1 GPU machines.
  # scaleTier: CUSTOM
  masterType: large_model
  # workerType: complex_model_m_gpu
  # parameterServerType: large_model
  # workerCount: 9
  # parameterServerCount: 3


================================================
FILE: annotator_models/requirements.txt
================================================
absl-py==0.1.12
astor==0.6.2
backports.weakref==1.0.post1
bleach==3.3.0
cachetools==2.0.1
certifi==2024.7.4
chardet==3.0.4
dill==0.2.7.1
enum34==1.1.6
funcsigs==1.0.2
future==0.18.3
futures==3.2.0
gapic-google-cloud-datastore-v1==0.15.3
gapic-google-cloud-error-reporting-v1beta1==0.15.3
gapic-google-cloud-logging-v2==0.91.3
gast==0.2.0
google-api-core==1.1.0
google-auth==1.4.1
google-auth-oauthlib==0.2.0
google-cloud==0.32.0
google-cloud-bigquery==0.31.0
google-cloud-bigquery-datatransfer==0.1.1
google-cloud-bigtable==0.28.1
google-cloud-container==0.1.1
google-cloud-core==0.28.1
google-cloud-datastore==1.4.0
google-cloud-dns==0.28.0
google-cloud-error-reporting==0.28.0
google-cloud-firestore==0.28.0
google-cloud-language==1.0.1
google-cloud-logging==1.4.0
google-cloud-monitoring==0.28.1
google-cloud-pubsub==0.30.1
google-cloud-resource-manager==0.28.1
google-cloud-runtimeconfig==0.28.1
google-cloud-spanner==0.29.0
google-cloud-speech==0.30.0
google-cloud-storage==1.6.0
google-cloud-trace==0.17.0
google-cloud-translate==1.3.1
google-cloud-videointelligence==1.0.1
google-cloud-vision==0.29.0
google-gax==0.15.16
google-resumable-media==0.3.1
googleapis-common-protos==1.5.3
grpc-google-iam-v1==0.11.4
grpcio==1.53.2
html5lib==0.999999999
httplib2==0.19.0
idna==3.7
Markdown==2.6.11
mock==2.0.0
numpy==1.22.0
oauth2client==3.0.0
oauthlib==2.0.7
pandas==0.22.0
pandas-gbq==0.3.1
pbr==4.0.0
ply==3.8
proto-google-cloud-datastore-v1==0.90.4
proto-google-cloud-error-reporting-v1beta1==0.15.3
proto-google-cloud-logging-v2==0.91.3
protobuf==3.18.3
psutil==5.6.6
pyasn1==0.4.2
pyasn1-modules==0.2.1
python-dateutil==2.7.2
pytz==2018.3
requests==2.32.0
requests-oauthlib==0.8.0
rsa==4.7
six==1.11.0
tensorboard==1.12.0
tensorflow==2.12.1
termcolor==1.1.0
urllib3==1.26.18
Werkzeug==3.0.3


================================================
FILE: annotator_models/results/.gitignore
================================================
*
!.gitignore


================================================
FILE: annotator_models/trainer/__init__.py
================================================


================================================
FILE: annotator_models/trainer/dawid_skene.py
================================================
"""Description: Given unreliable ratings of items classes by multiple raters, determine the most likely true class for each item, class marginals, and  individual error rates for each rater, using Expectation Maximization

References:
( Dawid and Skene (1979). Maximum Likelihood Estimation of Observer
Error-Rates Using the EM Algorithm. Journal of the Royal Statistical Society.
Series C (Applied Statistics), Vol. 28, No. 1, pp. 20-28.
"""

import argparse
import logging
import math
import sys
import time

import numpy as np
import pandas as pd
from scipy import stats
import tensorflow as tf


FLAGS = None
np.set_printoptions(precision=2)


def run(items,
        raters,
        classes,
        counts,
        label,
        psuedo_count,
        tol=1,
        max_iter=25,
        init='average'):
  """
    Run the Dawid-Skene estimator on response data

    Input:
      responses: a pandas DataFrame of ratings where each row is a rating from
                 some rater ('_worker_id') on some item ('_unit_id')
      tol: tolerance required for convergence of EM
      max_iter: maximum number of iterations of EM
    """

  # initialize
  iteration = 0
  converged = False
  old_class_marginals = None
  old_error_rates = None

  # item_classes is a matrix of estimates of true item classes of size
  # [items, classes]
  item_classes = initialize(counts)
  [nItems, nRaters, nClasses] = np.shape(counts)

  logging.info('Iter\tlog-likelihood\tdelta-CM\tdelta-Y_hat')

  while not converged:
    iteration += 1
    start_iter = time.time()

    # M-step - updated error rates and class marginals given new
    #          distribution over true item classes
    old_item_classes = item_classes

    (class_marginals, error_rates) = m_step(counts, item_classes, psuedo_count)

    # E-step - calculate expected item classes given error rates and
    #          class marginals
    item_classes = e_step_verbose(counts, class_marginals, error_rates)

    # check likelihood
    log_L = calc_likelihood(counts, class_marginals, error_rates)

    # calculate the number of seconds the last iteration took
    iter_time = time.time() - start_iter

    # check for convergence
    if old_class_marginals is not None:
      class_marginals_diff = np.sum(
          np.abs(class_marginals - old_class_marginals))
      item_class_diff = np.sum(np.abs(item_classes - old_item_classes))

      logging.info('{0}\t{1:.1f}\t{2:.4f}\t\t{3:.2f}\t({4:3.2f} secs)'.format(
          iteration, log_L, class_marginals_diff, item_class_diff, iter_time))

      if (class_marginals_diff < tol and item_class_diff < tol) \
         or iteration > max_iter:
        converged = True
    else:
      logging.info('{0}\t{1:.1f}'.format(iteration, log_L))

    # update current values
    old_class_marginals = class_marginals
    old_error_rates = error_rates

  return class_marginals, error_rates, item_classes


def load_data(path, unit_id, worker_id, label):
  logging.info('Loading data from {0}'.format(path))

  with tf.gfile.Open(path, 'rb') as fileobj:
    df = pd.read_csv(fileobj, encoding='utf-8')

  # only keep necessary columns
  df = df[[unit_id, worker_id, label]]
  return df


def initialize(counts):
  """
    Get initial estimates for the true item classes using counts
    see equation 3.1 in Dawid-Skene (1979)

    Input:
      counts: counts of the number of times each response was given
          by each rater for each item: [items x raters x classes]. Note
          in the crowd rating example, counts will be a 0/1 matrix.

    Returns:
      item_classes: matrix of estimates of true item classes:
          [items x responses]
    """
  [nItems, nRaters, nClasses] = np.shape(counts)

  # sum over raters
  response_sums = np.sum(counts, 1)

  # create an empty array
  item_classes = np.zeros([nItems, nClasses])

  # for each item, take the average number of ratings in each class
  for p in range(nItems):
    item_classes[p, :] = response_sums[p, :] / np.sum(
        response_sums[p, :], dtype=float)

  return item_classes


def m_step(counts, item_classes, psuedo_count):
  """
    Get estimates for the prior class probabilities (p_j) and the error
    rates (pi_jkl) using MLE with current estimates of true item classes
    See equations 2.3 and 2.4 in Dawid-Skene (1979)

    Input:
      counts: Array of how many times each rating was given by each rater
        for each item
      item_classes: Matrix of current assignments of items to classes
      psuedo_count: A psuedo count used to smooth the error rates. For each
      rater k
        and for each class i and class j, we pretend rater k has rated
        psuedo_count examples with class i when class j was the true class.

    Returns:
      p_j: class marginals [classes]
      pi_kjl: error rates - the probability of rater k giving
          response l for an item in class j [observers, classes, classes]
    """
  [nItems, nRaters, nClasses] = np.shape(counts)

  # compute class marginals
  class_marginals = np.sum(item_classes, axis=0) / float(nItems)

  # compute error rates for each rater, each predicted class
  # and each true class

  error_rates = np.matmul(counts.T, item_classes) + psuedo_count

  # reorder axes so its of size [nItems x nClasses x nClasses]
  error_rates = np.einsum('abc->bca', error_rates)

  # divide each row by the sum of the error rates over all observation classes
  sum_over_responses = np.sum(error_rates, axis=2)[:, :, None]

  # for cases where an annotator has never used a label, set their sum over
  # responses for that label to 1 to avoid nan when we divide. The result will
  # be error_rate[k, i, j] is 0 if annotator k never used label i.
  sum_over_responses[sum_over_responses == 0] = 1

  error_rates = np.divide(error_rates, sum_over_responses)

  return (class_marginals, error_rates)


def m_step_verbose(counts, item_classes, psuedo_count):
  """
    This method is the verbose (i.e. not vectorized) version of the m_step.
    It is currently not used because the vectorized version is faster, but we
    leave it here for future debugging.

    Get estimates for the prior class probabilities (p_j) and the error
    rates (pi_jkl) using MLE with current estimates of true item classes
    See equations 2.3 and 2.4 in Dawid-Skene (1979)

    Input:
      counts: Array of how many times each rating was given by each rater
        for each item
      item_classes: Matrix of current assignments of items to classes
      psuedo_count: A psuedo count used to smooth the error rates. For each
      rater k
        and for each class i and class j, we pretend rater k has rated
        psuedo_count examples with class i when class j was the true class.

    Returns:
      p_j: class marginals [classes]
      pi_kjl: error rates - the probability of rater k giving
          response l for an item in class j [observers, classes, classes]
    """
  [nItems, nRaters, nClasses] = np.shape(counts)

  # compute class marginals
  class_marginals = np.sum(item_classes, 0) / float(nItems)

  # compute error rates for each rater, each predicted class
  # and each true class
  error_rates = np.zeros([nRaters, nClasses, nClasses])
  for k in range(nRaters):
    for j in range(nClasses):
      for l in range(nClasses):
        error_rates[k, j, l] = np.dot(item_classes[:,j], counts[:,k,l]) \
                               + psuedo_count

      # normalize by summing over all observation classes
      sum_over_responses = np.sum(error_rates[k, j, :])

      if sum_over_responses > 0:
        error_rates[k, j, :] = error_rates[k, j, :] / float(sum_over_responses)

  return (class_marginals, error_rates)


def e_step(counts_tiled, class_marginals, error_rates):
  """
    Determine the probability of each item belonging to each class,
    given current ML estimates of the parameters from the M-step
    See equation 2.5 in Dawid-Skene (1979)

    Inputs:
      counts_tiled: A matrix of how many times each rating was given
          by each rater for each item, repeated for each class to make matrix
          multiplication fasterr. Size: [nItems, nRaters, nClasses, nClasses]
      class_marginals: probability of a random item belonging to each class.
          Size: [nClasses]
      error_rates: probability of rater k assigning a item in class j
          to class l. Size [nRaters, nClasses, nClasses]

    Returns:
      item_classes: Soft assignments of items to classes
          [items x classes]
    """
  [nItems, _, nClasses, _] = np.shape(counts_tiled)

  error_rates_tiled = np.tile(error_rates, (nItems, 1, 1, 1))
  power = np.power(error_rates_tiled, counts_tiled)

  # Note, multiplying over axis 1 and then 2 is substantially faster than
  # the equivalent np.prod(power, axis=(1,3)
  item_classes = class_marginals * np.prod(np.prod(power, axis=1), axis=2)

  # normalize error rates by dividing by the sum over all classes
  item_sum = np.sum(item_classes, axis=1, keepdims=True)
  item_classes = np.divide(item_classes, np.tile(item_sum, (1, nClasses)))

  return item_classes


def e_step_verbose(counts, class_marginals, error_rates):
  """
    This method is the verbose (i.e. not vectorized) version of
    the e_step. It is actually faster than the vectorized e_step
    function (16 seconds vs 25 seconds respectively on 10k ratings).

    Determine the probability of each item belonging to each class,
    given current ML estimates of the parameters from the M-step
    See equation 2.5 in Dawid-Skene (1979)

    Inputs:
      counts: Array of how many times each rating was given
          by each rater for each item
      class_marginals: probability of a random item belonging to each class
      error_rates: probability of rater k assigning a item in class j
          to class l [raters, classes, classes]

    Returns:
      item_classes: Soft assignments of items to classes
          [items x classes]
    """
  [nItems, nRaters, nClasses] = np.shape(counts)

  item_classes = np.zeros([nItems, nClasses])

  for i in range(nItems):
    for j in range(nClasses):
      estimate = class_marginals[j]
      estimate *= np.prod(np.power(error_rates[:, j, :], counts[i, :, :]))
      item_classes[i, j] = estimate

  # normalize error rates by dividing by the sum over all classes
  item_sum = np.sum(item_classes, axis=1, keepdims=True)
  item_classes = np.divide(item_classes, np.tile(item_sum, (1, nClasses)))

  return item_classes


def calc_likelihood(counts, class_marginals, error_rates):
  """
    Calculate the likelihood given the current parameter estimates
    This should go up monotonically as EM proceeds
    See equation 2.7 in Dawid-Skene (1979)

    Inputs:
      counts: Array of how many times each response was received
          by each rater from each item
      class_marginals: probability of a random item belonging to each class
      error_rates: probability of rater k assigning a item in class j
          to class l [raters, classes, classes]

    Returns:
      Likelihood given current parameter estimates
    """
  [nItems, nRaters, nClasses] = np.shape(counts)
  log_L = 0.0

  for i in range(nItems):
    item_likelihood = 0.0
    for j in range(nClasses):

      class_prior = class_marginals[j]
      item_class_likelihood = np.prod(
          np.power(error_rates[:, j, :], counts[i, :, :]))
      item_class_posterior = class_prior * item_class_likelihood
      item_likelihood += item_class_posterior

    temp = log_L + np.log(item_likelihood)

    if np.isnan(temp) or np.isinf(temp):
      logging.info('{0}, {1}, {2}'.format(i, log_L, np.log(item_likelihood),
                                          temp))
      sys.exit()

    log_L = temp

  return log_L


def random_initialization(counts):
  """
    Similar to initialize() above, except choose one initial class for each
    item, weighted in proportion to the counts.

    Input:
      counts: counts of the number of times each response was received
          by each rater from each item: [items x raters x classes]

    Returns:
      item_classes: matrix of estimates of true item classes:
          [items x responses]
    """
  [nItems, nRaters, nClasses] = np.shape(counts)

  response_sums = np.sum(counts, 1)

  # create an empty array
  item_classes = np.zeros([nItems, nClasses])

  # for each item, choose a random initial class, weighted in proportion
  # to the counts from all raters
  for p in range(nItems):
    weights = response_sums[p, :] / np.sum(response_sums[p, :], dtype=float)
    item_classes[p, np.random.choice(np.arange(nClasses), p=weights)] = 1

  return item_classes


def majority_voting(counts):
  """
      An alternative way to initialize assignment of items to classes
      i.e Get initial estimates for the true item classes using majority voting

    Input:
      counts: Counts of the number of times each response was received
          by each rater from each item: [items x raters x classes]
    Returns:
      item_classes: matrix of initial estimates of true item classes:
          [items x responses]
    """
  [nItems, nRaters, nClasses] = np.shape(counts)
  # sum over observers
  response_sums = np.sum(counts, 1)

  # create an empty array
  item_classes = np.zeros([nItems, nClasses])

  # take the most frequent class for each item
  for p in range(nItems):
    indices = np.argwhere(response_sums[p, :] == np.max(response_sums[p, :]))
    # in the case of ties, take the lowest valued label (could be randomized)
    item_classes[p, np.min(indices)] = 1

  return item_classes


def parse_item_classes(df, label, item_classes, index_to_unit_id_map,
                       index_to_y_map, unit_id, worker_id, comment_text_path):
  """
    Given the original data df, the predicted item_classes, and
    the data mappings, returns a DataFrame with the fields:
      * _unit_index: the 0,1,...nItems index
      * _unit_id: the original item ID
      * {LABEL}_hat: the predicted probability of the item being labeled 1 as
               learned from the Dawid-Skene algorithm
      * {LABEL}_mean: the mean of the original ratings
    """
  LABEL_HAT = '{}_hat'.format(label)
  LABEL_MEAN = '{}_mean'.format(label)
  ROUND_DEC = 8
  _, N_ClASSES = np.shape(item_classes)

  df_predictions = pd.DataFrame()

  # Add columns for predictions for each class
  col_names = []
  for k in range(N_ClASSES):
    # y is the original value of the class. When we train, we re-map
    # all the classes to 0,1,....K. But our data has classes like
    # -2,-1,0,1,2. In that case, of k is 0, then y would be -2
    y = index_to_y_map[k]
    col_name = '{0}_{1}'.format(LABEL_HAT, y)
    col_names.append(col_name)

    df_predictions[col_name] = [round(i[k], ROUND_DEC) for i in item_classes]

  # To get a prediction of the mean label, multiply our predictions with the
  # true y values.
  y_values = list(index_to_y_map.values())
  col_name = '{0}_hat_mean'.format(label)
  df_predictions[col_name] = np.dot(df_predictions[col_names], list(y_values))

  # Use the _unit_index to map to the original _unit_id
  df_predictions['_unit_index'] = range(len(item_classes))
  df_predictions[unit_id] = df_predictions['_unit_index']\
                               .apply(lambda i: index_to_unit_id_map[i])

  # Calculate the y_mean from the original data and join on _unit_id
  # Add a column for the mean predictions
  df[label] = df[label].astype(float)
  mean_labels = df.groupby(unit_id, as_index=False)[label]\
                 .mean()\
                 .round(ROUND_DEC)\
                 .rename(index=int, columns={label: LABEL_MEAN})
  df_predictions = pd.merge(mean_labels, df_predictions, on=unit_id)

  # join with data that contains the item-level comment text
  if comment_text_path:
    with tf.gfile.Open(comment_text_path, 'r') as fileobj:
      logging.info(
          'Loading comment text data from {}'.format(comment_text_path))
      df_comments = pd.read_csv(fileobj)

      # drop duplicate comments
      df_comments = df_comments.drop_duplicates(subset=unit_id)

    df_predictions = df_predictions.merge(df_comments, on=unit_id)
  return df_predictions


def parse_error_rates(df, error_rates, index_to_worker_id_map, index_to_y_map,
                      unit_id, worker_id):
  """
    Given the original data DataFrame, the predicted error_rates and the
    mappings
    between the indexes and ids, returns a DataFrame with the fields:

      * _worker_index: the 0,1,...nItems index
      * _worker_id: the original item ID
      * _error_rate_{k}_{k}: probability the worker would choose class k when
          the true class is k (for accurate workers, these numbers are high).
    """
  columns = [worker_id, '_worker_index']

  df_error_rates = pd.DataFrame()

  # add the integer _worker_index
  df_error_rates['_worker_index'] = index_to_worker_id_map.keys()

  # add the original _worker_id
  df_error_rates[worker_id] = [j for (i, j) in index_to_worker_id_map.items()]

  # add annotation counts for each worker
  worker_counts = df.groupby(
      by=worker_id, as_index=False)[unit_id]\
                    .count()\
                    .rename(index=int, columns={unit_id: 'n_annotations'})

  df_error_rates = pd.merge(df_error_rates, worker_counts, on=worker_id)

  # add the diagonal error rates, which are the per-class accuracy rates,
  # for each class k, we add a column for p(rater will pick k | item's true class is k)

  # y_label is the original y value in the data and y_index is the
  # integer we mapped it to, i.e. 0, 1, ..., |Y|
  for y_index, y_label in index_to_y_map.items():
    col_name = 'accuracy_rate_{0}'.format(y_label)
    df_error_rates[col_name] = [e[y_index, y_index] for e in error_rates]

  return df_error_rates


def main(FLAGS):
  logging.basicConfig(level=logging.INFO)

  # load data, each row is an annotation
  n_examples = FLAGS.n_examples
  label = FLAGS.label
  unit_id = FLAGS.unit_id_col
  worker_id = FLAGS.worker_id_col
  comment_text_path = FLAGS.comment_text_path
  df = load_data(FLAGS.data_path, unit_id, worker_id, label)[0:n_examples]

  logging.info('Running on {0} examples for label {1}'.format(len(df), label))

  # convert rater, item and label IDs to integers starting at 0
  #
  #   * worker_id_to_index_map: _worker_id -> index
  #   * index_to_worker_id_map: index -> worker
  #   * unit_id_to_index_map: _unit_id -> index
  #   * index_to_unit_id_map: index -> _unit_id
  #   * y_to_index_map: label -> index
  #   * index_to_y_map: index -> label
  worker_id_to_index_map = {
      w: i for (i, w) in enumerate(df[worker_id].unique())
  }
  index_to_worker_id_map = {i: w for (w, i) in worker_id_to_index_map.items()}
  unit_id_to_index_map = {w: i for (i, w) in enumerate(df[unit_id].unique())}
  index_to_unit_id_map = {i: w for (w, i) in unit_id_to_index_map.items()}
  y_to_index_map = {w: i for (i, w) in enumerate(df[label].unique())}
  index_to_y_map = {i: w for (w, i) in y_to_index_map.items()}

  # create list of unique raters, items and labels
  raters = list(df[worker_id].apply(lambda x: worker_id_to_index_map[x]))
  items = list(df[unit_id].apply(lambda x: unit_id_to_index_map[x]))
  y = list(df[label].apply(lambda x: y_to_index_map[x]))

  nClasses = len(df[label].unique())
  nItems = len(df[unit_id].unique())
  nRaters = len(df[worker_id].unique())
  counts = np.zeros([nItems, nRaters, nClasses])

  # convert responses to counts
  for i, item_index in enumerate(items):
    rater_index = raters[i]
    y_index = y[i]
    counts[item_index, rater_index, y_index] += 1

  raters_unique = index_to_worker_id_map.keys()
  items_unique = index_to_unit_id_map.keys()
  classes_unique = index_to_y_map.keys()

  logging.info('num items: {0}'.format(len(items_unique)))
  logging.info('num raters: {0}'.format(len(raters_unique)))
  logging.info('num classes: {0}'.format(len(classes_unique)))

  # run EM
  start = time.time()
  class_marginals, error_rates, item_classes = run(
      items_unique,
      raters_unique,
      classes_unique,
      counts,
      label,
      FLAGS.pseudo_count,
      tol=FLAGS.tolerance,
      max_iter=FLAGS.max_iter)
  end = time.time()
  logging.info('training time: {0:.4f} seconds'.format(end - start))

  # join comment_text, old labels and new labels
  df_predictions = parse_item_classes(df, label, item_classes,
                                      index_to_unit_id_map, index_to_y_map,
                                      unit_id, worker_id, comment_text_path)

  # join rater error_rates
  df_error_rates = parse_error_rates(df, error_rates, index_to_worker_id_map,
                                     index_to_y_map, unit_id, worker_id)

  # write predictions and error_rates out as CSV
  n = len(df)
  prediction_path = '{0}/predictions_{1}_{2}.csv'.format(
      FLAGS.job_dir, label, n)
  error_rates_path = '{0}/error_rates_{1}_{2}.csv'.format(
      FLAGS.job_dir, label, n)

  logging.info('Writing predictions to {}'.format(prediction_path))
  with tf.gfile.Open(prediction_path, 'w') as fileobj:
    df_predictions.to_csv(fileobj, index=False, encoding='utf-8')

  logging.info('Writing error rates to {}'.format(error_rates_path))
  with tf.gfile.Open(error_rates_path, 'w') as fileobj:
    df_error_rates.to_csv(fileobj, index=False, encoding='utf-8')


if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--data-path',
      help='The path to data to run on, local or in Cloud Storage.')
  parser.add_argument(
      '--comment-text-path',
      help='The path to comment text, local or in  Cloud Storage.')
  parser.add_argument(
      '--worker-id-col', help='Column name of worker id.', default='_worker_id')
  parser.add_argument(
      '--unit-id-col', help='Column name of unit id.', default='_comment_id')
  parser.add_argument(
      '--n_examples',
      help='The number of annotations to use.',
      default=10000000,
      type=int)
  parser.add_argument(
      '--label',
      help='The label to train on, e.g. "obscene" or "threat"',
      default='obscene')
  parser.add_argument(
      '--job-dir',
      type=str,
      default='',
      help='The directory where the job is staged.')
  parser.add_argument(
      '--max-iter',
      help='The max number of iteration to run.',
      type=int,
      default=25)
  parser.add_argument(
      '--pseudo-count',
      help='The pseudo count to smooth error rates.',
      type=float,
      default=1.0)
  parser.add_argument(
      '--tolerance',
      help='Stop training when variables change less than this value.',
      type=int,
      default=1)

  FLAGS = parser.parse_args()

  print('FLAGS', FLAGS)

  main(FLAGS)


================================================
FILE: annotator_models/trainer/dawid_skene_test.py
================================================
"""Tests for dawid_skene."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import os
import pandas as pd
import tempfile
import unittest

import dawid_skene

class DawidSkeneTest(unittest.TestCase):

  # The contents of Maximum Likelihood Estimation of Observer Error-Rates
  # Using the EM Algorithm Table 1.
  def setUp(self):
    self.table_1 = pd.DataFrame.from_dict({
        'patient':
            range(1, 46),
        11: [
            1, 3, 1, 2, 2, 2, 1, 3, 2, 2, 4, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2,
            2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 2, 4, 2, 2, 3, 1, 1, 1, 2, 1, 2
        ],
        12: [
            1, 3, 1, 2, 2, 2, 2, 3, 2, 3, 4, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2,
            2, 1, 1, 3, 1, 1, 1, 1, 3, 1, 2, 2, 3, 2, 3, 3, 1, 1, 2, 3, 2, 2
        ],
        13: [
            1, 3, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2,
            1, 1, 1, 2, 1, 1, 2, 1, 3, 1, 2, 2, 3, 1, 2, 3, 1, 1, 1, 2, 1, 2
        ],
        2: [
            1, 4, 2, 3, 3, 3, 2, 3, 2, 2, 4, 3, 1, 3, 1, 2, 1, 1, 2, 1, 2, 2, 3,
            2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 3, 4, 2, 3, 3, 1, 1, 2, 2, 1, 2
        ],
        3: [
            1, 3, 1, 1, 2, 3, 1, 4, 2, 2, 4, 3, 1, 2, 1, 1, 1, 1, 2, 3, 2, 2, 2,
            2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 2, 2, 4, 1, 1, 1, 2, 1, 2
        ],
        4: [
            1, 3, 2, 2, 2, 2, 1, 3, 2, 2, 4, 4, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
            2, 1, 1, 2, 1, 1, 2, 1, 3, 1, 2, 3, 4, 3, 3, 3, 1, 1, 1, 2, 1, 2
        ],
        5: [
            1, 4, 2, 1, 2, 2, 1, 3, 3, 3, 4, 3, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2,
            2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 2, 3, 2, 3, 2, 1, 1, 1, 2, 1, 2
        ]
    })

  def test_paper_example(self):
    with tempfile.TemporaryDirectory() as tempdirname:
      f = tempfile.NamedTemporaryFile(delete=False)
      f.file.close()
      data = self.table_1.set_index('patient').stack().rename_axis(['patient', 'observer']).to_frame('label').reset_index()
      data['observer'] = data['observer'].map({11:1, 12:1, 13:1, 2:2, 3:3, 4:4, 5:5})
      data.to_csv(f.name, header=True)

      Flags = collections.namedtuple('Flags', 'n_examples label unit_id_col worker_id_col comment_text_path data_path pseudo_count tolerance max_iter job_dir')
      Flags.data_path = f.name
      Flags.label = 'label'
      Flags.worker_id_col = 'observer'
      Flags.unit_id_col = 'patient'
      Flags.n_examples = 350
      Flags.pseudo_count = 1.0
      Flags.comment_text_path = None
      Flags.max_iter = 25
      Flags.tolerance = 1
      Flags.job_dir = tempdirname
      dawid_skene.main(Flags)
      os.unlink(f.name)
      predictions = pd.read_csv(os.path.join(tempdirname, 'predictions_label_315.csv'))
      print(predictions)
      error_rates = pd.read_csv(os.path.join(tempdirname, 'error_rates_label_315.csv'))
      print(error_rates)


if __name__ == '__main__':
  unittest.main()


================================================
FILE: attention-tutorial/Attention_Model_Tutorial.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "szO16q_1vXOT"
   },
   "source": [
    "# Attention Based Classification Tutorial\n",
    "\n",
    "**Recommended time: 30 minutes**\n",
    "\n",
    "**Contributors: nthain, martin-gorner**\n",
    "\n",
    "\n",
    "This tutorial provides an introduction to building text classification models in tensorflow that use attention to provide insight into how classification decisions are being made. We will build our tensorflow graph following the Embed - Encode - Attend - Predict paradigm introduced by Matthew Honnibal. For more information about this approach, you can refer to:\n",
    "\n",
    "Slides: https://goo.gl/BYT7au\n",
    "\n",
    "Video: https://youtu.be/pzOzmxCR37I\n",
    "\n",
    "\n",
    "Figure 1 below provides a representation of the full tensorflow graph we will build in this tutorial. The green squares represent RNN cells and the blue trapezoids represent neural networks for computing attention weights which will be discussed in more detail below. We will implement each piece of this model graph in a seperate function. The whole model will then simply be calling all of these functions in turn. \n",
    "\n",
    "\n",
    "![Figure 1](img/entire_model.png \"Figure 1\")\n",
    "\n",
    "This tutorial was created in collaboration with the Tensorflow without a PhD series. To check out more episodes, tutorials, and codelabs from this series, please visit: \n",
    "\n",
    "https://github.com/GoogleCloudPlatform/tensorflow-without-a-phd\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "tROhMjW49Dsr"
   },
   "source": [
    "### Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "vSgQlcQqbWyb"
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import time\n",
    "import os\n",
    "from sklearn import metrics\n",
    "from visualize_attention import attentionDisplay\n",
    "from process_figshare import download_figshare, process_figshare\n",
    "\n",
    "tf.set_random_seed(1234)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "KKwX66FG9G-L"
   },
   "source": [
    "## Load & Explore Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "4YFtwZsD4J7r"
   },
   "source": [
    "Let's begin by downloading the data from [Figshare](https://figshare.com/articles/Wikipedia_Talk_Labels_Toxicity/4563973) and cleaning and splitting it for use in training."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "download_figshare()\n",
    "process_figshare()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We then load these splits as pandas dataframes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "aIy4ggIxbWyg"
   },
   "outputs": [],
   "source": [
    "SPLITS = ['train', 'dev', 'test']\n",
    "\n",
    "wiki = {}\n",
    "for split in SPLITS:\n",
    "    wiki[split] = pd.read_csv('data/wiki_%s.csv' % split)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "_eZEM1wd5FiA"
   },
   "source": [
    "We display the top few rows of the dataframe to see what we're dealing with. The key columns are 'comment' which contains the text of a comment from a Wikipedia talk page and 'toxicity' which contains the fraction of annotators who found this comment to be toxic. More information about the other fields and how this data was collected can be found on [this wiki](https://meta.wikimedia.org/wiki/Research:Detox/Data_Release) and [research paper](https://arxiv.org/abs/1610.08914).\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 195,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 334,
     "status": "ok",
     "timestamp": 1519755503377,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "6sj_aimNbWyn",
    "outputId": "36fccb7e-60a3-4d1c-bbfa-03483ff49f84"
   },
   "outputs": [],
   "source": [
    "wiki['train'].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "p0cz2kA_9JxK"
   },
   "source": [
    "### Hyperparameters"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hyperparameters are used to specify various aspects of our model's architecture. In practice, these are often critical to model performance and are carefully tuned using some type of [hyperparameter search](https://en.wikipedia.org/wiki/Hyperparameter_optimization). For this tutorial, we will choose a reasonable set of hyperparameters and treat them as fixed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "JSvJ3wwwbWys"
   },
   "outputs": [],
   "source": [
    "hparams = {'max_document_length': 60,\n",
    "           'embedding_size': 50,\n",
    "           'rnn_cell_size': 128,\n",
    "           'batch_size': 256,\n",
    "           'attention_size': 32,\n",
    "           'attention_depth': 2}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "owTqZg2ebWyv"
   },
   "outputs": [],
   "source": [
    "MAX_LABEL = 2\n",
    "WORDS_FEATURE = 'words'\n",
    "NUM_STEPS = 300"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 0: Text Preprocessing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Before we can build a neural network on comment strings, we first have to complete a number of preprocessing steps. In particular, it is important that we \"tokenize\" the string, splitting it into an array of tokens. In our case, each token will be a word in our sentence and they will be seperated by spaces and punctuation. Many alternative tokenizers exist, some of which use characters as tokens, and others which include punctuation, emojis, or even cleverly handle misspellings. \n",
    "\n",
    "Once we've tokenized the sentences, each word will be replaced with an integer representative. This will make the embedding (Step 1) much easier. \n",
    "\n",
    "Happily the tensorflow function [VocabularyProcessor](http://tflearn.org/data_utils/#vocabulary-processor) takes care of both the tokenization and integer mapping. We only have to give it the max_document_length argument which will determine the length of the output arrays. If sentences are shorter than this length, they will be padded and if they are longer, they will be trimmed. The VocabularyProcessor is then trained on the training set to build the initial vocabulary and map the words to integers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "9kcrgebgbWzB"
   },
   "outputs": [],
   "source": [
    "# Initialize the vocabulary processor\n",
    "vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(hparams['max_document_length'])\n",
    "\n",
    "def process_inputs(vocab_processor, df, train_label = 'train', test_label = 'test'):\n",
    "    \n",
    "    # For simplicity, we call our features x and our outputs y\n",
    "    x_train = df['train'].comment\n",
    "    y_train = df['train'].is_toxic\n",
    "    x_test = df['test'].comment\n",
    "    y_test = df['test'].is_toxic\n",
    "\n",
    "    # Train the vocab_processor from the training set\n",
    "    x_train = vocab_processor.fit_transform(x_train)\n",
    "    # Transform our test set with the vocabulary processor\n",
    "    x_test = vocab_processor.transform(x_test)\n",
    "\n",
    "    # We need these to be np.arrays instead of generators\n",
    "    x_train = np.array(list(x_train))\n",
    "    x_test = np.array(list(x_test))\n",
    "    y_train = np.array(y_train).astype(int)\n",
    "    y_test = np.array(y_test).astype(int)\n",
    "\n",
    "    n_words = len(vocab_processor.vocabulary_)\n",
    "    print('Total words: %d' % n_words)\n",
    "\n",
    "    # Return the transformed data and the number of words\n",
    "    return x_train, y_train, x_test, y_test, n_words\n",
    "\n",
    "x_train, y_train, x_test, y_test, n_words = process_inputs(vocab_processor, wiki)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "1KtFmLmp9M0t"
   },
   "source": [
    "### Step 1: Embed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "AjtQe9eT9v4v"
   },
   "source": [
    "Neural networks at their core are a composition of operators from linear algebra and non-linear activation functions. In order to perform these computations on our input sentences, we must first embed them as a vector of numbers. There are two main approaches to perform this embedding:\n",
    "\n",
    "\n",
    "1.   **Pre-trained:** It is often beneficial to initialize our embedding matrix using pre-trained embeddings like [Word2Vec](??) or [GloVe](??). These embeddings are trained on a huge corpus of text with a general purpose problem so that they incorporate syntactic and semantic properties of the words being embedded and are amenable to transfer learning on new problems. Once initialized, you can optionally train them further for your specific problem by allowing the embedding matrix in the graph to be a trainable variable in our tensorflow graph. \n",
    "2.   **Random:** Alternatively, embeddings can be \"trained from scratch\" by initializing the embedding matrix randomly and then training it like any other parameter in the tensorflow graph.\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "rCubiw6eUVQm"
   },
   "source": [
    "In this notebook, we will be using a random initialization. To perform this embedding we use the embed_sequence function from the layers package. This will take our input features, which are the arrays of integers we produced in Step 0, and will randomly initialize a matrix to embed them into. The parameters of this matrix will then be trained with the rest of the graph."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "UG1UXX4L_KQk"
   },
   "outputs": [],
   "source": [
    "def embed(features):\n",
    "    word_vectors = tf.contrib.layers.embed_sequence(\n",
    "        features[WORDS_FEATURE], \n",
    "        vocab_size=n_words, \n",
    "        embed_dim=hparams['embedding_size'])\n",
    "    \n",
    "    return word_vectors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "nBp5uc-tSee2"
   },
   "source": [
    "### Step 2: Encode"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "9vjxtIroTBUq"
   },
   "source": [
    "A [recurrent neural network](https://en.wikipedia.org/wiki/Recurrent_neural_network) is a deep learning architecture that is useful for encoding sequential information like sentences. They are built around a single cell which contains one of several standard neural network architectures (e.g. simple [RNN](https://en.wikipedia.org/wiki/Recurrent_neural_network), [GRU](https://en.wikipedia.org/wiki/Gated_recurrent_unit), or [LSTM](https://en.wikipedia.org/wiki/Long_short-term_memory)). We will not focus on the details of the architectures, but at each point in time the cell takes in two inputs and produces two outputs. The inputs are the input token for that step in the sequence and some state from the previous steps in the sequence. The outputs produced are the encoded vectors for the current sequence step and a state to pass on to the next step of the sequence. \n",
    "\n",
    "Figure 2 shows what this looks like for an unrolled RNN. Each cell (represented by a green square) has two input arrows and two output arrrows. Note that all of the green squares represent the same cell and share parameters. One major advantage of this cell replication is that, at inference time, it allows us to deal with arbitrary length input and not be restricted by the input sizes of our training set.\n",
    "\n",
    "![Figure 2](img/figure_2_v0.png \"Figure 2\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For our model, we will use a bi-directional RNN. This is simply the concatentation of two RNNs, one which processes the sequence from left to right (the \"forward\" RNN) and one which process from right to left (the \"backward\" RNN). By using both directions, we get a stronger encoding as each word can be encoded using the context of its neighbors on boths sides rather than just a single side.  For our cells, we use [gated recurrent units (GRUs)](https://en.wikipedia.org/wiki/Gated_recurrent_unit). Figure 3 gives a visual representation of this.\n",
    "\n",
    "![Figure 3](img/figure_3.png \"Figure 3\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "DBDS9LjdUZbV"
   },
   "outputs": [],
   "source": [
    "def encode(word_vectors):\n",
    "    # Create a Gated Recurrent Unit cell with hidden size of RNN_SIZE.\n",
    "    # Since the forward and backward RNNs will have different parameters, we instantiate two seperate GRUS.\n",
    "    rnn_fw_cell = tf.contrib.rnn.GRUCell(hparams['rnn_cell_size'])\n",
    "    rnn_bw_cell = tf.contrib.rnn.GRUCell(hparams['rnn_cell_size'])\n",
    "    \n",
    "    # Create an unrolled Bi-Directional Recurrent Neural Networks to length of\n",
    "    # max_document_length and passes word_list as inputs for each unit.\n",
    "    outputs, _ = tf.nn.bidirectional_dynamic_rnn(rnn_fw_cell, \n",
    "                                                 rnn_bw_cell, \n",
    "                                                 word_vectors, \n",
    "                                                 dtype=tf.float32, \n",
    "                                                 time_major=False)\n",
    "    \n",
    "    return outputs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "V8hbwTb7dXLV"
   },
   "source": [
    "### Step 3: Attend"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "PMKkWgSwdZSq"
   },
   "source": [
    "There are a number of ways to use the encoded states of a recurrent neural network for prediction. One traditional approach is to simply use the final encoded state of the network, as seen in Figure 2. However, this could lose some useful information encoded in the previous steps of the sequence. In order to keep that information, one could instead use an average of the encoded states outputted by the RNN. There is not reason to believe, though, that all of the encoded states of the RNN are equally valuable. Thus, we arrive at the idea of using a weighted sum of these encoded states to make our prediction.\n",
    "\n",
    "We will call the weights of this weighted sum \"attention weights\" as we will see below that they correspond to how important our model thinks each token of the sequence is in making a prediction decision. We compute these attention weights simply by building a small fully connected neural network on top of each encoded state. This network will have a single unit final layer which will correspond to the attention weight we will assign. As for RNNs, the parameters of this network will be the same for each step of the sequence, allowing us to accomodate variable length inputs. Figure 4 shows us what the graph would look like if we applied attention to a uni-directional RNN.\n",
    "\n",
    "![Figure 4](img/figure_4.png \"Figure 4\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Again, as our model uses a bi-directional RNN, we first concatenate the hidden states from each RNN before computing the attention weights and applying the weighted sum. Figure 5 below visualizes this step. \n",
    "\n",
    "![Figure 5](img/figure_5.png \"Figure 5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "3a9fkmUOdeHh"
   },
   "outputs": [],
   "source": [
    "def attend(inputs, attention_size, attention_depth):\n",
    "  \n",
    "  inputs = tf.concat(inputs, axis = 2)\n",
    "  \n",
    "  inputs_shape = inputs.shape\n",
    "  sequence_length = inputs_shape[1].value\n",
    "  final_layer_size = inputs_shape[2].value\n",
    "  \n",
    "  x = tf.reshape(inputs, [-1, final_layer_size])\n",
    "  for _ in range(attention_depth-1):\n",
    "    x = tf.layers.dense(x, attention_size, activation = tf.nn.relu)\n",
    "  x = tf.layers.dense(x, 1, activation = None)\n",
    "  logits = tf.reshape(x, [-1, sequence_length, 1])\n",
    "  alphas = tf.nn.softmax(logits, dim = 1)\n",
    "  \n",
    "  output = tf.reduce_sum(inputs * alphas, 1)\n",
    "\n",
    "  return output, alphas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "bqtYZzWeoz55"
   },
   "source": [
    "### Step 4: Predict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To genereate a class prediction about whether a comment is toxic or not, the final part of our tensorflow graph takes the weighted average of hidden states generated in the attention step and uses a fully connected layer with a softmax activation function to generate probability scores for each of our prediction classes. While training, the model will use the cross-entropy loss function to train its parameters. \n",
    "\n",
    "As we will use the [estimator framework](https://www.tensorflow.org/get_started/custom_estimators) to train our model, we write an estimator_spec function to specify how our model is trained and what values to return during the prediction stage. We also specify the evaluation metrics of accuracy and auc, which we will use to evaluate our model in Step 7."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "L6_Wo4ixbWzI"
   },
   "outputs": [],
   "source": [
    "def estimator_spec_for_softmax_classification(\n",
    "    logits, labels, mode, alphas):\n",
    "  \"\"\"Returns EstimatorSpec instance for softmax classification.\"\"\"\n",
    "  predicted_classes = tf.argmax(logits, 1)\n",
    "  if mode == tf.estimator.ModeKeys.PREDICT:\n",
    "    return tf.estimator.EstimatorSpec(\n",
    "        mode=mode,\n",
    "        predictions={\n",
    "            'class': predicted_classes,\n",
    "            'prob': tf.nn.softmax(logits),\n",
    "            'attention': alphas\n",
    "        })\n",
    "\n",
    "  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)\n",
    "  loss = tf.losses.softmax_cross_entropy(\n",
    "      onehot_labels=onehot_labels, logits=logits)\n",
    "  if mode == tf.estimator.ModeKeys.TRAIN:\n",
    "    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n",
    "    train_op = optimizer.minimize(loss, \n",
    "                                  global_step=tf.train.get_global_step())\n",
    "    return tf.estimator.EstimatorSpec(mode, \n",
    "                                      loss=loss, \n",
    "                                      train_op=train_op)\n",
    "\n",
    "  eval_metric_ops = {\n",
    "      'accuracy': tf.metrics.accuracy(\n",
    "          labels=labels, predictions=predicted_classes),\n",
    "      'auc': tf.metrics.auc(\n",
    "          labels=labels, predictions=predicted_classes),    \n",
    "  }\n",
    "  return tf.estimator.EstimatorSpec(\n",
    "      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The predict component of our graph then just takes the output of our attention step, i.e. the weighted average of the bi-RNN hidden layers, and adds one more fully connected layer to compute the logits. These logits are fed into a our estimator_spec which uses a softmax to get the final class probabilties and a [softmax_cross_entropy](https://www.tensorflow.org/api_docs/python/tf/losses/softmax_cross_entropy) to build a loss function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(encoding, labels, mode, alphas):\n",
    "    logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)\n",
    "    return estimator_spec_for_softmax_classification(\n",
    "          logits=logits, labels=labels, mode=mode, alphas=alphas)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "0URRXudn9Qlg"
   },
   "source": [
    "### Step 5: Complete Model Architecture"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "cdb9C4jNbCBj"
   },
   "source": [
    "We are now ready to put it all together. As you can see from the bi_rnn_model function below, once you have the components for embed, encode, attend, and predict, putting the whole graph together is extremely simple!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "FcxSFa5vbWzR"
   },
   "outputs": [],
   "source": [
    "def bi_rnn_model(features, labels, mode):\n",
    "  \"\"\"RNN model to predict from sequence of words to a class.\"\"\"\n",
    "\n",
    "  word_vectors = embed(features)\n",
    "  outputs = encode(word_vectors)\n",
    "  encoding, alphas = attend(outputs, \n",
    "                            hparams['attention_size'], \n",
    "                            hparams['attention_depth'])\n",
    "\n",
    "  return predict(encoding, labels, mode, alphas)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![Figure 1](img/entire_model.png \"Figure 1\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "9jZqVeWx9TVT"
   },
   "source": [
    "### Step 6: Train Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will use the estimator framework to train our model. To define our classifier, we just provide it with the complete model graph (i.e. the bi_rnn_model function) and a directory where the models will be saved."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "HFDYpImJbWzT"
   },
   "outputs": [],
   "source": [
    "current_time = str(int(time.time()))\n",
    "model_dir = os.path.join('checkpoints', current_time)\n",
    "classifier = tf.estimator.Estimator(model_fn=bi_rnn_model, \n",
    "                                    model_dir=model_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The estimator framework also requires us to define an input function. This will take the input data and provide it during model training in batches. We will use the provided numpy_input_function, which takes numpy arrays as features and labels. We also specify the batch size and whether we want to shuffle the data between epochs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 34,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 153379,
     "status": "ok",
     "timestamp": 1519758352944,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "gXJdQHe-bWzX",
    "outputId": "353cbe80-0e36-4832-ed8e-5e6d31087ca1"
   },
   "outputs": [],
   "source": [
    "# Train.\n",
    "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
    "  x={WORDS_FEATURE: x_train},\n",
    "  y=y_train,\n",
    "  batch_size=hparams['batch_size'],\n",
    "  num_epochs=None,\n",
    "  shuffle=True)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, it's finally time to train our model! With estimator, this is as easy as calling the train function and specifying how long we'd like to train for."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "classifier.train(input_fn=train_input_fn, \n",
    "                 steps=NUM_STEPS)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "wJQI2zW19V8j"
   },
   "source": [
    "### Step 7: Predict and Evaluate Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To evaluate the function, we will use it to predict the values of examples from our test set. Again, we define a numpy_input_fn, for the test data in this case, and then have the classifier run predictions on this input function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "4E5poMgPbWza"
   },
   "outputs": [],
   "source": [
    "# Predict.\n",
    "test_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
    "  x={WORDS_FEATURE: x_test},\n",
    "  y=y_test,\n",
    "  num_epochs=1,\n",
    "  shuffle=False)\n",
    "\n",
    "predictions = classifier.predict(input_fn=test_input_fn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "These predictions are returned to us as a generator. The code below gives an example of how we can extract the class and attention weights for each prediction."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "oTL7trjX00Zp"
   },
   "outputs": [],
   "source": [
    "y_predicted = []\n",
    "alphas_predicted = []\n",
    "for p in predictions:\n",
    "    y_predicted.append(p['class'])\n",
    "    alphas_predicted.append(p['attention'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To evaluate our model, we can use the evaluate function provided by estimator to get the [accuracy](https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers) and [ROC-AUC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) scores as we defined them in our estimator_spec."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 34,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 17936,
     "status": "ok",
     "timestamp": 1519758410784,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "jpgentt6bWzf",
    "outputId": "ae6de3cc-9eb5-469a-e04e-958a784e9dee"
   },
   "outputs": [],
   "source": [
    "scores = classifier.evaluate(input_fn=test_input_fn)\n",
    "print('Accuracy: {0:f}'.format(scores['accuracy']))\n",
    "print('AUC: {0:f}'.format(scores['auc']))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "lOmmwP6UV8h7"
   },
   "source": [
    "### Step 8: Display Attention"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that we have a trained attention based toxicity model, let's use it to visualize how our model makes its classification decisions. We use the helpful attentionDisplay class from the visualize_attention package. Given any sentence, this class uses our trained classifier to determine whether the sentence is toxic and also returns a representation of the attention weights. In the arrays below, the more red a word is, the more weight classifier puts on encoded word. Try it out on some sentences of your own and see what patterns you can find!\n",
    "\n",
    "Note: If you are viewing this on Github, the colors in the cells won't display properly. We recommend viewing it locally or with [nbviewer](https://nbviewer.jupyter.org/) to see the correct rendering of the attention weights."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display = attentionDisplay(vocab_processor, classifier)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1096,
     "status": "ok",
     "timestamp": 1519758417492,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "xSpv2plUV4mN",
    "outputId": "952a6fc6-bac4-46ab-c354-c54e5d288d75"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"Fuck off, you idiot.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1024,
     "status": "ok",
     "timestamp": 1519758419192,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "m9bsno-UV4o0",
    "outputId": "beb38261-3e4e-4348-e62f-d23bac629268"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"Thanks for your help editing this.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1223,
     "status": "ok",
     "timestamp": 1519758421016,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "nB4G8rriV4wt",
    "outputId": "2b540ca1-a03d-475a-a54a-6c22558e0be3"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"You're such an asshole. But thanks anyway.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1067,
     "status": "ok",
     "timestamp": 1519758422814,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "2L3TNl-NV4zV",
    "outputId": "d58ba84a-c30f-4ddb-ecb5-3fc36a850bd5"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"I'm going to shoot you!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1383,
     "status": "ok",
     "timestamp": 1519758424819,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "r5BKahjfV41o",
    "outputId": "05b91277-4d0a-4627-8cb9-c2275a799927"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"Oh shoot. Well alright.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1154,
     "status": "ok",
     "timestamp": 1519758426592,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "8GicGWbCV4uz",
    "outputId": "f02500eb-35a9-466a-a759-8b83fb05feb3"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"First of all who the fuck died and made you the god.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1061,
     "status": "ok",
     "timestamp": 1519758428491,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "kWIR-ivlWi18",
    "outputId": "fb25ede3-e321-4abb-e358-3a0be35266fa"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"Gosh darn it!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1400,
     "status": "ok",
     "timestamp": 1519758433415,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "MJhqEbl8WlJm",
    "outputId": "acf96708-f04a-4493-a650-70ff8f6aa2a7"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"God damn it!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "height": 95,
     "output_extras": [
      {
       "item_id": 1
      },
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1400,
     "status": "ok",
     "timestamp": 1519758437722,
     "user": {
      "displayName": "Nithum Thain",
      "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg",
      "userId": "105288052437331023238"
     },
     "user_tz": 210
    },
    "id": "BDWSuL3kZCT1",
    "outputId": "795856d9-ab5d-48aa-ceb2-46a654eec60b"
   },
   "outputs": [],
   "source": [
    "display.display_prediction_attention(\"You're not that smart are you?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "default_view": {},
   "last_runtime": {
    "build_target": "//learning/brain/python/client:colab_notebook",
    "kind": "private"
   },
   "name": "Attention Model Codelab.ipynb",
   "provenance": [
    {
     "file_id": "1TEez0zxlE23RyPtPVEUaL6zhim-r8gMj",
     "timestamp": 1518199421351
    },
    {
     "file_id": "0By5BN4UDRuWSSHJuR2t2YVIzZjQ",
     "timestamp": 1509645017645
    }
   ],
   "version": "0.3.2",
   "views": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: attention-tutorial/README.md
================================================
# Attention Based Classification Tutorial

**Recommended time: 30 minutes**

**Contributors: nthain, martin-gorner**


This tutorial provides an introduction to building text classification models in Tensorflow that use attention to provide insight into how classification decisions are being made. We will build our Tensorflow graph following the Embed - Encode - Attend - Predict paradigm introduced by Matthew Honnibal. For more information about this approach, you can refer to:

Slides: https://goo.gl/BYT7au

Video: https://youtu.be/pzOzmxCR37I

Figure 1 below provides a representation of the full Tensorflow graph we will build in this tutorial.

![Figure 1](img/entire_model.png "Figure 1")

This tutorial was created in collaboration with the Tensorflow without a PhD series. To check out more episodes, tutorials, and codelabs from this series, please visit: 

https://github.com/GoogleCloudPlatform/tensorflow-without-a-phd


## To Run Locally

1.  Setup a (virtualenv)[https://virtualenvwrapper.readthedocs.io/en/latest/] for
    the project (recommended, but technically optional).
    ```

    Python 3:

    ```
    python3 -m venv env
    ```

    To enter your virtual env:

    ```shell
    source env/bin/activate
    ```

2.  Install library dependencies:

    ```shell
    pip install -r requirements.txt
    ```
    

================================================
FILE: attention-tutorial/checkpoints/README.md
================================================
This directory stores model checkpoints during training.


================================================
FILE: attention-tutorial/data/README.md
================================================
A directory to hold our toxicity data.

================================================
FILE: attention-tutorial/process_figshare.py
================================================
"""Cleans and splits the toxicity data from Figshare:

https://figshare.com/articles/Wikipedia_Talk_Labels_Toxicity/4563973

------------------------------------------------------------------------

Copyright 2018, Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import os
import re
from urllib.request import urlretrieve

DEFAULT_DATA_DIR = 'data/'
FIGSHARE_PATH = 'https://ndownloader.figshare.com/files/'
FIGSHARE_URL_MAPPING = {
    'toxicity_annotations.tsv': FIGSHARE_PATH + '7394539',
    'toxicity_annotated_comments.tsv': FIGSHARE_PATH + '7394542'
}


def download_figshare(download_data_dir=DEFAULT_DATA_DIR):
  """
    Downloads the toxicity data from Figshare.

    Args:
          * download_data_dir (string): if provided, the directory where the
            Figshare tsvs should be stored
  """
  if not os.path.exists(download_data_dir):
    os.makedirs(download_data_dir)

  already_exist = True
  for file in ['toxicity_annotations.tsv', 'toxicity_annotated_comments.tsv']:
    if not os.path.isfile(os.path.join(download_data_dir, file)):
      already_exist = False
      print('Downloading %s...' % file, end='')
      urlretrieve(FIGSHARE_URL_MAPPING[file],
                  os.path.join(download_data_dir, file))
      print('Done!')

  if already_exist:
    print('Figshare data already exists.')
    return


def process_figshare(input_data_dir=DEFAULT_DATA_DIR,
                     output_data_dir=DEFAULT_DATA_DIR):
  """
    Cleans and splits the toxicity data from Figshare.

    Args:
          * input_data_dir (string): if provided, the directory where the
            Figshare tsvs are stored
          * output_data_dir (string): if provided, the directory where the
            output splits should be written
  """
  already_exist = True
  for split in ['train', 'test', 'dev']:
    if not os.path.isfile(os.path.join(output_data_dir, 'wiki_%s.csv' % split)):
      already_exist = False

  if already_exist:
    print('Processed files already exist.')
    return

  print('Processing files...', end='')
  toxicity_annotated_comments = pd.read_csv(
      os.path.join(input_data_dir, 'toxicity_annotated_comments.tsv'),
      sep='\t',
      dtype={'rev_id': 'str'})
  toxicity_annotations = pd.read_csv(
      os.path.join(input_data_dir, 'toxicity_annotations.tsv'),
      sep='\t',
      dtype={'rev_id': 'str'})

  annotations_gped = toxicity_annotations.groupby(
      'rev_id', as_index=False).agg({'toxicity': 'mean'})
  all_data = pd.merge(
      annotations_gped, toxicity_annotated_comments, on='rev_id')

  all_data['comment'] = all_data['comment'].apply(lambda x: re.sub(
      'NEWLINE_TOKEN|TAB_TOKEN', ' ', x))

  all_data['is_toxic'] = all_data['toxicity'] > 0.5

  # split into train, valid, test
  wiki_splits = {}
  for split in ['train', 'test', 'dev']:
    wiki_splits[split] = all_data.query('split == @split')

  for split in wiki_splits:
    wiki_splits[split].to_csv(
        os.path.join(output_data_dir, 'wiki_%s.csv' % split), index=False)
  print('Done!')


# TODO(nthain): Add input and output dirs as flags.
if __name__ == '__main__':
  process_figshare()


================================================
FILE: attention-tutorial/requirements.txt
================================================
absl-py==0.1.9
appnope==0.1.0
bleach==3.3.0
certifi==2024.7.4
chardet==3.0.4
comet-ml==1.0.8
decorator==4.2.1
entrypoints==0.2.3
enum34==1.1.6
futures==3.1.1
h5py==2.7.1
html5lib==0.999999999
idna==3.7
ipykernel==4.8.2
ipython==8.10.0
ipython-genutils==0.2.0
ipywidgets==7.1.2
jedi==0.11.1
Jinja2==3.1.4
jsonschema==2.6.0
jupyter==1.0.0
jupyter-client==5.2.3
jupyter-console==5.2.0
jupyter-core==4.11.2
kaggle==1.0.5
Keras==2.13.1
Markdown==2.6.11
MarkupSafe==1.0
mistune==2.0.3
nbconvert==6.5.1
nbformat==4.4.0
nltk==3.9
notebook==6.4.12
numpy==1.22.0
pandas==0.22.0
pandocfilters==1.4.2
parso==0.1.1
pexpect==4.4.0
pickleshare==0.7.4
Pillow==10.3.0
prompt-toolkit==1.0.15
protobuf==3.18.3
ptyprocess==0.5.2
Pygments==2.15.0
python-dateutil==2.6.1
pytz==2017.3
PyYAML==5.4
pyzmq==17.0.0
qtconsole==4.3.1
requests==2.32.2
scikit-learn==0.19.1
scipy==1.10.0
Send2Trash==1.5.0
simplegeneric==0.8.1
six==1.11.0
sklearn==0.0
tensorflow==2.12.1
tensorflow-tensorboard==1.5.0
terminado==0.8.1
testpath==0.3.1
tflearn==0.3.2
tornado==6.4.1
traitlets==4.3.2
urllib3==1.26.18
wcwidth==0.1.7
webencodings==0.5.1
websocket-client==0.47.0
Werkzeug==3.0.6
widgetsnbextension==3.1.4
wurlitzer==1.0.1


================================================
FILE: attention-tutorial/visualize_attention.py
================================================
"""A class to help visualize attention weights.

------------------------------------------------------------------------

Copyright 2018, Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import tensorflow as tf
import numpy as np

pd.set_option('max_columns', 100)
tokenizer = tf.contrib.learn.preprocessing.tokenizer
WORDS_FEATURE = 'words'
MAX_DOCUMENT_LENGTH = 60


class wordVal(object):
  """A helper class that represents a word and value simultaneously."""

  def __init__(self, word, val):
    self.word = word
    self.val = val

  def __str__(self):
    return self.word


class attentionDisplay(object):
  """A class to visualize attention weights produced by a classifer on a given string."""

  def __init__(self, vocab_processor, classifier, words_feature='words'):
    """
        Args:
          * vocab_processor: a trained vocabulary processor from
            tf.contrib.learn.preprocessing.VocabularyProcessor
          * classifier: the classifier of class Estimator produced in
            Attention_Model_Codelab.ipynb
          * words_feature (string): if provided, the key for the comments in the
            feed dictionary expected by the classifier
    """

    self.vocab_processor = vocab_processor
    self.classifier = classifier
    self.words_feature = words_feature

  def _rgb_to_hex(self, rgb):
    return '#%02x%02x%02x' % rgb

  def _color_wordvals(self, s):
    r = 255 - int(s.val * 255)
    color = self._rgb_to_hex((255, r, r))
    return 'background-color: %s' % color

  def _predict_sentence(self, input_string):
    x_test = self.vocab_processor.transform([input_string])
    x_test = np.array(list(x_test))

    test_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={self.words_feature: x_test}, num_epochs=1, shuffle=False)

    predictions = self.classifier.predict(input_fn=test_input_fn)
    y_predicted = []
    alphas_predicted = []
    for p in predictions:
      y_predicted.append(p['class'])
      alphas_predicted.append(p['attention'])
    return y_predicted, alphas_predicted

  def _resize_and_tokenize(self, input_string):
    tokenized_sentence = list(tokenizer([input_string]))[0]
    tokenized_sentence = tokenized_sentence + [''] * (
        MAX_DOCUMENT_LENGTH - len(tokenized_sentence))
    tokenized_sentence = tokenized_sentence[:MAX_DOCUMENT_LENGTH]
    return tokenized_sentence

  def display_prediction_attention(self, input_string):
    """Visualizes the attention weights of the initialized classifier on the given string."""
    pred, attn = self._predict_sentence(input_string)
    if pred[0]:
      print('Toxic')
    else:
      print('Not toxic')
    tokenized_string = self._resize_and_tokenize(input_string)
    wordvals = [wordVal(w, v) for w, v in zip(tokenized_string, attn[0])]
    word_df = pd.DataFrame(wordvals).transpose()
    return word_df.style.applymap(self._color_wordvals)


================================================
FILE: data_preparation/README.md
================================================
# Dataset preparation

This directory contains some steps to prepare our data before training our ML models. In particular, we want to:
 * Shuffle the data and split it into train, eval and test datasets.
 * Create an artificial bias (female vs male) for our embedding experiments. This is done by modifying the toxicity rate for examples labeled as 'male'.


## Environment Setup

### Python Dependencies

Install library dependencies (it is optional, but recommended to install these
in a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html):

    ```shell
    # The python2 way to create and use virtual environment
    # (optional, but recommended):
    virtualenv .pyenv
    source .pyenv/bin/activate
    # Install dependencies
    pip install -r requirements.txt

    jupyter notebook

    # ... do stuff ...

    # Exit your virtual environment.
    deactivate
    ```


### Execution flow


#### Splits the data locally

We recommend using a small dataset 'train_small.tfrecord'.

  ```shell
  NOW=$(date +%Y%m%d%H%M%S)
  JOB_NAME=data-preparation-$NOW

  python run_preprocessing_data_split.py \
    --job_dir 'local_data' \
    --input_data_path 'local_data/train_small.tfrecord' \
    --output_folder 'local_data/train_eval_test/'
  ```

#### Splits the data on the cloud

  ```shell
  NOW=$(date +%Y%m%d%H%M%S)
  JOB_NAME=data-preparation-$NOW

  python run_preprocessing_data_split.py \
    --job_name $JOB_NAME \
    --job_dir gs://kaggle-model-experiments/dataflow/$JOB_NAME \
    --input_data_path 'gs://kaggle-model-experiments/resources/civil_comments_data/train.tfrecord' \
    --output_folder 'gs://kaggle-model-experiments/resources/civil_comments_data/train_eval_test' \
    --cloud
  ```

#### Creates the artificial_bias locally

```shell
  NOW=$(date +%Y%m%d%H%M%S)
  JOB_NAME=data-preparation-$NOW

  python run_preprocessing_artificial_bias.py \
    --job_dir 'local_data' \
    --input_data_path 'local_data/train_eval_test/train*.tfrecord' \
    --output_folder 'local_data/artificial_bias'
  ```

#### Creates the artificial_bias on the cloud

```shell
  NOW=$(date +%Y%m%d%H%M%S)
  JOB_NAME=data-preparation-$NOW
  python run_preprocessing_artificial_bias.py \
    --job_name $JOB_NAME \
    --job_dir gs://kaggle-model-experiments/dataflow/$JOB_NAME \
    --input_data_path 'gs://kaggle-model-experiments/resources/civil_comments_data/train_eval_test/train*.tfrecord' \
    --output_folder gs://kaggle-model-experiments/resources/civil_comments_data/artificial_bias/${USER}/${NOW} \
    --cloud
  ```


================================================
FILE: data_preparation/config.ini
================================================
[CLOUD]
project = wikidetox
runner = DataflowRunner
max_num_workers = 50
defaultWorkerLogLevel = INFO
log_level = ERROR
zone = us-east1-b

[LOCAL]
project = wikidetox
runner = DirectRunner
defaultWorkerLogLevel=INFO
log_level = ERROR


================================================
FILE: data_preparation/preprocessing/__init__.py
================================================


================================================
FILE: data_preparation/preprocessing/constants.py
================================================
"""Constants variables for preprocessing."""

TRAIN_DATA_PREFIX = 'train'
EVAL_DATA_PREFIX = 'eval'
TEST_DATA_PREFIX = 'test'
TRAIN_ARTIFICIAL_BIAS_PREFIX = 'train_artificial_bias'


================================================
FILE: data_preparation/preprocessing/preprocessing.py
================================================
"""Preprocessing steps of the data preparation."""

import os
import random

import apache_beam as beam
import tensorflow as tf
from tensorflow_transform import coders

import constants
import tfrecord_utils


def get_identity_list():
  return [
      'male', 'female', 'transgender', 'other_gender', 'heterosexual',
      'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation',
      'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist',
      'other_religion', 'black', 'white', 'asian', 'latino',
      'other_race_or_ethnicity', 'physical_disability',
      'intellectual_or_learning_disability', 'psychiatric_or_mental_illness',
      'other_disability'
  ]


def get_civil_comments_spec(include_identity_terms=True):
  """Returns the spec of the civil_comments dataset."""
  spec = {
      'comment_text': tf.FixedLenFeature([], dtype=tf.string),
      'id': tf.FixedLenFeature([], dtype=tf.string),
      'toxicity': tf.FixedLenFeature([], dtype=tf.float32),
      'severe_toxicity': tf.FixedLenFeature([], dtype=tf.float32),
      'obscene': tf.FixedLenFeature([], dtype=tf.float32),
      'sexual_explicit': tf.FixedLenFeature([], dtype=tf.float32),
      'identity_attack': tf.FixedLenFeature([], dtype=tf.float32),
      'insult': tf.FixedLenFeature([], dtype=tf.float32),
      'threat': tf.FixedLenFeature([], dtype=tf.float32),
      'toxicity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64),
      'identity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64),
  }
  if include_identity_terms:
    for identity in get_identity_list():
      spec[identity] = tf.FixedLenFeature([],
                                          dtype=tf.float32,
                                          default_value=-1.0)
  return spec


def split_data(examples, train_fraction, eval_fraction):
  """Splits the data into train/eval/test."""

  def partition_fn(data, n_partition):
    random_value = random.random()
    if random_value < train_fraction:
      return 0
    if random_value < train_fraction + eval_fraction:
      return 1
    return 2

  examples_split = (examples | 'SplitData' >> beam.Partition(partition_fn, 3))
  return examples_split


@beam.ptransform_fn
def Shuffle(examples):  # pylint: disable=invalid-name
  return (examples
          | 'PairWithRandom' >> beam.Map(lambda x: (random.random(), x))
          | 'GroupByRandom' >> beam.GroupByKey()
          | 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs))


def write_to_tf_records(examples, output_path):
  """Shuffles and writes to disk."""

  output_path_prefix = os.path.basename(output_path)
  shuff_ex = (examples | 'Shuffle_' + output_path_prefix >> Shuffle())
  _ = (
      shuff_ex
      | 'Serialize_' + output_path_prefix >> beam.ParDo(
          tfrecord_utils.EncodeTFRecord(
              feature_spec=get_civil_comments_spec(),
              optional_field_names=get_identity_list()))
      | 'WriteToTF_' + output_path_prefix >> beam.io.WriteToTFRecord(
          file_path_prefix=output_path, file_name_suffix='.tfrecord'))


class OversampleExample(beam.DoFn):
  """Oversamples examples from a given class."""

  def __init__(self, rule_fn, oversample_rate):
    if (oversample_rate <= 0) or not isinstance(oversample_rate, int):
      raise ValueError('oversample_rate should be a positive integer.')
    self._rule_fn = rule_fn
    self._oversample_rate = oversample_rate

  def process(self, element):
    if self._rule_fn(element):
      for _ in range(self._oversample_rate):
        yield element
    else:
      yield element


def _select_male_toxic_example(example,
                               threshold_identity=0.5,
                               threshold_toxic=0.5):
  is_toxic = example['toxicity'] >= threshold_toxic
  if 'male' in example:
    is_male = example['male'] >= threshold_identity
  else:
    is_male = False
  return is_toxic and is_male


def run_data_split(p, input_data_path, train_fraction, eval_fraction,
                   output_folder):
  """Splits the data into train/eval/test.

  Args:
    p: Beam pipeline for constructing PCollections and applying PTransforms.
    input_data_path: Input TF Records.
    train_fraction: Fraction of the data to be allocated to the training set.
    eval_fraction: Fraction of the data to be allocated to the eval set.
    output_folder: Folder to save the train/eval/test datasets.

  Raises:
    ValueError:
        If train_fraction + eval_fraction >= 1.
        If the output_directory exists. This exception prevents the user
            from overwriting a previous split.
  """

  if (train_fraction + eval_fraction >= 1.):
    raise ValueError('Train and eval fraction are incompatible.')
  if tf.gfile.Exists(output_folder):
    raise ValueError('Output directory should be empty.'
                     ' You should select a different path.')

  examples = (
      p
      | 'ReadExamples' >>
      beam.io.tfrecordio.ReadFromTFRecord(file_pattern=input_data_path))
  examples = (
      examples
      | 'DecodeTFRecord' >> beam.ParDo(
          tfrecord_utils.DecodeTFRecord(
              feature_spec=get_civil_comments_spec(),
              optional_field_names=get_identity_list())))

  split = split_data(examples, train_fraction, eval_fraction)
  train_data = split[0]
  eval_data = split[1]
  test_data = split[2]

  write_to_tf_records(train_data,
                      os.path.join(output_folder, constants.TRAIN_DATA_PREFIX))
  write_to_tf_records(eval_data,
                      os.path.join(output_folder, constants.EVAL_DATA_PREFIX))
  write_to_tf_records(test_data,
                      os.path.join(output_folder, constants.TEST_DATA_PREFIX))


def run_artificial_bias(p, train_input_data_path, output_folder,
                        oversample_rate):
  """Main function to create artificial bias.

  Args:
    p: Beam pipeline for constructing PCollections and applying PTransforms.
    train_input_data_path: Input TF Records, which is typically the training
      dataset. This artificial bias method should not be run on eval/test.
    output_folder: Folder to save the train/eval/test datasets.
    oversample_rate: How many times to oversample the targeted class.
  """

  train_data = (
      p
      | 'ReadExamples' >>
      beam.io.tfrecordio.ReadFromTFRecord(file_pattern=train_input_data_path)
      | 'DecodeTFRecord' >> beam.ParDo(
          tfrecord_utils.DecodeTFRecord(
              feature_spec=get_civil_comments_spec(),
              optional_field_names=get_identity_list())))

  train_data_artificially_biased = (
      train_data
      | 'CreateBias' >> beam.ParDo(
          OversampleExample(_select_male_toxic_example, oversample_rate)))

  write_to_tf_records(
      train_data_artificially_biased,
      os.path.join(output_folder, constants.TRAIN_ARTIFICIAL_BIAS_PREFIX))


================================================
FILE: data_preparation/preprocessing/tfrecord_utils.py
================================================
"""Utilities to decode and encode TF Records.

These utilities are wrappers around TF-Tranform coders to handle the
    specificities around optional fields.
"""

import apache_beam as beam
from tensorflow_transform import coders


class Schema(object):
  """Defines the dataset schema for tf-transform.

  We should have used dataset_schema from tensorflow_transform.tf_metadata.
      However, there is a lack of support for `FixedLenFeature` default value,
      and an exception is triggered by _feature_from_feature_spec.
  TODO(fprost): Submit internal bug here.
  """

  def __init__(self, spec):
    self._spec = spec

  def as_feature_spec(self):
    return self._spec


class DecodeTFRecord(beam.DoFn):
  """Wrapper around ExampleProtoCoder for decoding optional fields.

  To decode a TF-Record example, we use the  coder utility
    'tensorflow_transform.codersExampleProtoCoder'. For optional fields,
    (indicated by 'default_value' argument for `FixedLenFeature`), the coder
    will generate the default value when the optional field is missing.
  This wrapper post-processes the coder and removes the field if the default
      value was used.
  """

  def __init__(self,
               feature_spec,
               optional_field_names,
               rule_optional_fn=lambda x: x < 0):
    """Initialises a TF-Record decoder.

    Args:
      feature_spec: Dictionary from feature names to one of `FixedLenFeature`,
        `SparseFeature` or `VarLenFeature. It contains all the features to parse
        (including optional ones).
      optional_field_names: list of optional fields.
      rule_optional_fn: function that take the value of an optional field and
        returns True if the value is indicative of a default value (e.g.
        resulting from the default value of parsing FixedLenFeature).  Current
        code requires that all optional_field_names share the rule_optional_fn.
    """
    self._schema = Schema(feature_spec)
    self._coder = coders.ExampleProtoCoder(self._schema)
    self._optional_field_names = optional_field_names
    self._rule_optional_fn = rule_optional_fn

  def process(self, element):
    parsed_element = self._coder.decode(element)
    for identity in self._optional_field_names:
      if self._rule_optional_fn(parsed_element[identity]):
        del parsed_element[identity]
    yield parsed_element


class EncodeTFRecord(beam.DoFn):
  """Wrapper around ExampleProtoCoder for encoding optional fields."""

  def __init__(self, feature_spec, optional_field_names):
    """Initialises a TF-Record encoder.

    Args:
      feature_spec: Dictionary from feature names to one of `FixedLenFeature`,
        `SparseFeature` or `VarLenFeature. It contains all the features to parse
        (including optional ones).
      optional_field_names: list of optional fields.
    """
    self._feature_spec = feature_spec
    self._optional_field_names = optional_field_names

  def process(self, element):
    element_spec = self._feature_spec.copy()
    for identity in self._optional_field_names:
      if identity not in element:
        del element_spec[identity]
    element_schema = Schema(element_spec)
    coder = coders.ExampleProtoCoder(element_schema)
    encoded_element = coder.encode(element)
    yield encoded_element


================================================
FILE: data_preparation/requirements.txt
================================================
apache-beam[gcp]==2.2.0
configparser==3.5.0
tensorflow==2.12.1
tensorflow_transform==0.9


================================================
FILE: data_preparation/run_preprocessing_artificial_bias.py
================================================
"""Sets up and start the Dataflow job for data preparation."""

import argparse
import logging
import os
import sys

import apache_beam as beam
import configparser
from preprocessing import preprocessing


def _parse_arguments(argv):
  """Parses command line arguments."""
  parser = argparse.ArgumentParser(
      description='Runs Preprocessing on Civil comments data.')
  parser.add_argument(
      '--cloud', action='store_true', help='Run preprocessing on the cloud.')
  parser.add_argument('--job_name', required=False, help='Dataflow job name')
  parser.add_argument(
      '--job_dir',
      required=True,
      help='Directory in which to stage code and write temporary outputs')
  parser.add_argument(
      '--output_folder',
      required=True,
      help='Directory where to write train, eval and test data')
  parser.add_argument('--input_data_path')
  parser.add_argument(
      '--oversample_rate',
      required=False,
      default=5,
      type=int,
      help='How many times to oversample the targeted class')
  args = parser.parse_args(args=argv[1:])
  return args


def _set_logging(log_level):
  logging.getLogger().setLevel(getattr(logging, log_level.upper()))


def _parse_config(env, config_file_path):
  """Parses configuration file.

  Args:
    env: The environment in which the preprocessing job will be run.
    config_file_path: Path to the configuration file to be parsed.

  Returns:
    A dictionary containing the parsed runtime config.
  """
  config = configparser.ConfigParser()
  config.read(config_file_path)
  return dict(config.items(env))


def main():
  """Configures pipeline and spawns preprocessing job."""
  args = _parse_arguments(sys.argv)
  config = _parse_config('CLOUD' if args.cloud else 'LOCAL', 'config.ini')
  options = {'project': str(config.get('project'))}
  if args.cloud:
    if not args.job_name:
      raise ValueError('Job name must be specified for cloud runs.')
    options.update({
        'job_name':
            args.job_name,
        'max_num_workers':
            int(config.get('max_num_workers')),
        'setup_file':
            os.path.abspath(
                os.path.join(os.path.dirname(__file__), 'setup.py')),
        'staging_location':
            os.path.join(args.job_dir, 'staging'),
        'temp_location':
            os.path.join(args.job_dir, 'tmp'),
        'zone':
            config.get('zone')
    })

  pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
  _set_logging(config.get('log_level'))
  with beam.Pipeline(
      str(config.get('runner')), options=pipeline_options) as pipeline:
    preprocessing.run_artificial_bias(
        pipeline,
        train_input_data_path=args.input_data_path,
        output_folder=args.output_folder,
        oversample_rate=args.oversample_rate)


if __name__ == '__main__':
  main()


================================================
FILE: data_preparation/run_preprocessing_data_split.py
================================================
"""Sets up and start the Dataflow job for data preparation."""

import argparse
import logging
import os
import sys

import apache_beam as beam
import configparser
from preprocessing import preprocessing


def _parse_arguments(argv):
  """Parses command line arguments."""
  parser = argparse.ArgumentParser(
      description='Runs Preprocessing on Civil comments data.')
  parser.add_argument(
      '--cloud', action='store_true', help='Run preprocessing on the cloud.')
  parser.add_argument('--job_name', required=False, help='Dataflow job name')
  parser.add_argument(
      '--job_dir',
      required=True,
      help='Directory in which to stage code and write temporary outputs')
  parser.add_argument(
      '--output_folder',
      required=True,
      help='Directory where to write train, eval and test data')
  parser.add_argument('--input_data_path')
  parser.add_argument(
      '--train_fraction',
      required=False,
      default=0.7,
      type=float,
      help='The fraction of the data to allocate to the training dataset')
  parser.add_argument(
      '--eval_fraction',
      required=False,
      default=0.15,
      type=float,
      help='The fraction of the data to allocate to the eval dataset')
  args = parser.parse_args(args=argv[1:])
  return args


def _set_logging(log_level):
  logging.getLogger().setLevel(getattr(logging, log_level.upper()))


def _parse_config(env, config_file_path):
  """Parses configuration file.

  Args:
    env: The environment in which the preprocessing job will be run.
    config_file_path: Path to the configuration file to be parsed.

  Returns:
    A dictionary containing the parsed runtime config.
  """
  config = configparser.ConfigParser()
  config.read(config_file_path)
  return dict(config.items(env))


def main():
  """Configures pipeline and spawns preprocessing job."""
  args = _parse_arguments(sys.argv)
  config = _parse_config('CLOUD' if args.cloud else 'LOCAL', 'config.ini')
  options = {'project': str(config.get('project'))}
  if args.cloud:
    if not args.job_name:
      raise ValueError('Job name must be specified for cloud runs.')
    options.update({
        'job_name':
            args.job_name,
        'max_num_workers':
            int(config.get('max_num_workers')),
        'setup_file':
            os.path.abspath(
                os.path.join(os.path.dirname(__file__), 'setup.py')),
        'staging_location':
            os.path.join(args.job_dir, 'staging'),
        'temp_location':
            os.path.join(args.job_dir, 'tmp'),
        'zone':
            config.get('zone')
    })

  pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
  _set_logging(config.get('log_level'))
  with beam.Pipeline(
      str(config.get('runner')), options=pipeline_options) as pipeline:
    preprocessing.run_data_split(
        pipeline,
        input_data_path=args.input_data_path,
        train_fraction=args.train_fraction,
        eval_fraction=args.eval_fraction,
        output_folder=args.output_folder)


if __name__ == '__main__':
  main()


================================================
FILE: data_preparation/setup.py
================================================
from setuptools import setup, find_packages

NAME = 'jigsaw'
VERSION = '1.0'
REQUIRED_PACKAGES = ['tensorflow-transform==0.9.0']

setup(
    name=NAME,
    version=VERSION,
    packages=find_packages(),
    install_requires=REQUIRED_PACKAGES,
)


================================================
FILE: experiments/.gitignore
================================================
# Ignore local data, e.g. copies of embeddings
local_data

# Ignore local tmp files and directories
tmp

# Local config to holds cloud/comel.ml settings.
tf_trainer/convai_config.py


================================================
FILE: experiments/README.md
================================================
# Text Classification Framework

This directory contains an ML framework for text classification. We illustrate
it with toxic (and other attributes) comment classification.

The framework is structured as a series of common files and templates to quickly
construct models on top of the [Keras](https://keras.io/) or the [TensorFlow
Estimator API](https://www.tensorflow.org/programmers_guide/estimators).

The templates also demonstrate how these models can be trained using [Google ML
Engine](https://cloud.google.com/ml-engine/).


## Environment Setup

### Build Tools/Bazel Dependencies

Install [Bazel](https://docs.bazel.build/versions/master/install-os-x.html);
this is the build tool we use to run tests, etc.

### Python Dependencies

Install library dependencies (it is optional, but recommended to install these
in a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html):

```shell
# The python3 way to create and use virtual environment
# (optional, but recommended):
python3 -m venv .pyenv
source .pyenv/bin/activate
# Install dependencies
pip install -r requirements.txt

# ... do stuff ...

# Exit your virtual environment.
deactivate
```

### Cloud and ML Engine configuration

1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/).
2. Log in:
```shell
gcloud auth login
```
You will be prompted to visit a page in the browser; follow the login instructions there.

Due to [some issues](https://stackoverflow.com/questions/44401088/using-training-tfrecords-that-are-stored-on-google-cloud), also run this command:

```shell
gcloud auth application-default login
```
Follow the instructions there as well.

3. Set the project:
```shell
gcloud config set project [PROJECT]
```

4. Verify that the above setup works:
```shell
gcloud ml-engine models list
```

You should see some existing models. Example output:
```shell
NAME                                DEFAULT_VERSION_NAME
kaggle_model                        v_20180627_173451
...
```

## Training an Existing Model

To train an existing model, execute either command:
 * `./tf_trainer/MODEL_NAME/run.local.sh` to run training locally, or
 * `./tf_trainer/MODEL_NAME/run.ml_engine.sh` to run training on [Google ML
Engine](https://cloud.google.com/ml-engine/).

These scripts assume that you have access to the resources on our cloud
projects. If you don't, you can still run the models locally, but will have to
modify the data paths in `run.local.sh`. At the moment, we only support reading
data in `tf.record` format. See
[`tools/convert_csv_to_tfrecord.py`](https://github.com/conversationai/conversationai-models/blob/master/experiments/tools/convert_csv_to_tfrecord.py)
for a simple CSV to `tf.record` converter.


## Running a hyper parameter tuning job

To run a hyper parameter tuning job on CMLE, execute the following command:
 * `./tf_trainer/MODEL_NAME/run.hyperparameter.sh`.

The hyperparameter configuration (MODEL_NAME/hparam_config.yaml) describes the job configuration, the parameters to tune and their respective range.

You can monitor your progress in the CMLE UI.


## Deploying a trained model on CMLE

At the end of your training, the model will be saved as a .pb file. Note: this is currently broken for keras models. TODO(fprost): Update this.

You can then deploy this model on CMLE by executing the following command:
 * `./tf_trainer/MODEL_NAME/run.deploy.sh`.

The model will be accessible as an API and available for [batch/online predictions](https://cloud.google.com/ml-engine/docs/tensorflow/batch-predict).
Further information can be found [here](https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models) about deploying models on CMLE.

## Deploying several models on CMLE for a given training run

The argument `n_export` allows you to save several models during your training run (1 model every train_steps/n).
All of the .pb filed will be saved in a subfolder of your MODEL_DIR.

There is a convenient utility in model_evaluation to help you to deploy all models on CMLE:
 * `python utils_export/deploy_continous_model.py --parent_dir MODEL_DIR --model_name MODEL_NAME `


## Evaluate an Existing Model on New Data

See `model_evaluation/` for further information.


### Type Checking

Check the typings:

```shell
mypy --ignore-missing-imports -p tf_trainer
```

It's recommended you use mypy as an additional linter in your editor.

### Testing

Run all the tests and see the output streamed:

```shell
bazel test --test_output=streamed ...
```

You can also run tests individually, directly with python like so:

```shell
python -m tf_trainer.common.tfrecord_input_test
python -m tf_trainer.common.base_keras_model_test
```

### Building a New Model

TODO(jjtan)


================================================
FILE: experiments/WORKSPACE
================================================
# Bazel Workspace File.


================================================
FILE: experiments/__init__.py
================================================


================================================
FILE: experiments/requirements.txt
================================================
absl-py==0.7.0
astor==0.7.1
bert-tensorflow==1.0.1
bleach==3.3.0
certifi==2024.7.4
chardet==3.0.4
gast==0.2.2
gcsfs==0.2.3
grpcio==1.53.2
h5py==2.9.0
html5lib==1.0.1
idna==3.7
jsonlines==1.2.0
Markdown==3.0.1
mypy==0.670
nltk==3.9
numpy==1.22.0
pandas==0.24.1
protobuf==3.18.3
PyYAML==5.4
requests==2.32.2
scipy==1.10.0
sentencepiece==0.1.8
six==1.12.0
tensorboard==1.12.2
tensorflow==2.12.1
tensorflow-hub==0.2.0
termcolor==1.1.0
tf-sentencepiece==0.1.8
typed-ast==1.3.2
urllib3==1.26.19
websocket-client==0.54.0
Werkzeug==3.0.3
wurlitzer==1.0.2


================================================
FILE: experiments/setup.py
================================================
from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = [
    'nltk>=3.3',
    'typed_ast==1.3.2',
    'tensorflow-hub==0.1.1',
    'bert-tensorflow==1.0.1'
]

setup(
    name='tf_trainer',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='TF Estimator modelling framework.')


================================================
FILE: experiments/testdata/BUILD
================================================
exports_files([
  "cats_and_dogs_onehot.vocab.txt",
  "cats_and_dogs_with_cat_opt_int_labels.jsonl",
  "cats_and_dogs_with_partial_cat_int_labels.jsonl",
  "cats_and_dogs.jsonl",
])


================================================
FILE: experiments/testdata/cats_and_dogs.jsonl
================================================
{ "text": "cats good", "bad": 0.0 }
{ "text": "cats bad", "bad": 1.0 }
{ "text": "dogs good", "bad": 0.0 }
{ "text": "dogs bad", "bad": 1.0 }
{ "text": "good cats", "bad": 0.0 }
{ "text": "dogs and cats", "bad": 0.0 }
{ "text": "not bad dogs and cats", "bad": 0.0 }
{ "text": "not bad dogs", "bad": 0.0 }
{ "text": "bad dogs and cats", "bad": 1.0 }
{ "text": "bad dogs and bad cats", "bad": 1.0 }
{ "text": "dogs and bad cats", "bad": 1.0 }
{ "text": "dogs and not bad cats", "bad": 0.0 }
{ "text": "dogs and cats bad", "bad": 1.0 }
{ "text": "dogs and cats good", "bad": 1.0 }
{ "text": "not dogs and bad cats", "bad": 1.0 }
{ "text": "not dogs and not cats", "bad": 0.0 }


================================================
FILE: experiments/testdata/cats_and_dogs_onehot.vocab.txt
================================================
dogs 1.0 0.0 0.0 0.0 0.0 0.0
cats 0.0 1.0 0.0 0.0 0.0 0.0
good 0.0 0.0 1.0 0.0 0.0 0.0
bad 0.0 0.0 0.0 1.0 0.0 0.0
and 0.0 0.0 0.0 0.0 1.0 0.0
not 0.0 0.0 0.0 0.0 0.0 1.0


================================================
FILE: experiments/testdata/cats_and_dogs_with_cat_opt_int_labels.jsonl
================================================
{ "text": "cats good", "bad": 0.0, "cat": 1 }
{ "text": "cats bad", "bad": 1.0, "cat": 1 }
{ "text": "dogs good", "bad": 0.0 }
{ "text": "dogs bad", "bad": 1.0 }
{ "text": "good cats", "bad": 0.0, "cat": 1 }
{ "text": "dogs and cats", "bad": 0.0, "cat": 1 }
{ "text": "not bad dogs and cats", "bad": 0.0, "cat": 1 }
{ "text": "not bad dogs", "bad": 0.0 }
{ "text": "bad dogs and cats", "bad": 1.0, "cat": 1 }
{ "text": "bad dogs and bad cats", "bad": 1.0, "cat": 1 }
{ "text": "dogs and bad cats", "bad": 1.0, "cat": 1 }
{ "text": "dogs and not bad cats", "bad": 0.0, "cat": 1 }
{ "text": "dogs and cats bad", "bad": 1.0, "cat": 1  }
{ "text": "dogs and cats good", "bad": 1.0, "cat": 1  }
{ "text": "not dogs and bad cats", "bad": 1.0, "cat": 1  }
{ "text": "not dogs and not cats", "bad": 0.0, "cat": 1 }


================================================
FILE: experiments/testdata/cats_and_dogs_with_partial_cat_int_labels.jsonl
================================================
{ "text": "cats good", "bad": 0.0, "cat": 1 }
{ "text": "cats bad", "bad": 1.0, "cat": 1 }
{ "text": "dogs good", "bad": 0.0, "cat": 0  }
{ "text": "dogs bad", "bad": 1.0, "cat": 0  }
{ "text": "good cats", "bad": 0.0, "cat": 1 }
{ "text": "dogs and cats", "bad": 0.0, "cat": 1 }
{ "text": "not bad dogs and cats", "bad": 0.0, "cat": 1 }
{ "text": "not bad dogs", "bad": 0.0, "cat": 0 }
{ "text": "bad dogs and cats", "bad": 1.0, "cat": 1 }
{ "text": "bad dogs and bad cats", "bad": 1.0, "cat": 1 }
{ "text": "dogs and bad cats", "bad": 1.0, "cat": 1 }
{ "text": "dogs and not bad cats", "bad": 0.0}
{ "text": "dogs and cats bad", "bad": 1.0 }
{ "text": "dogs and cats good", "bad": 1.0 }
{ "text": "not dogs and bad cats", "bad": 1.0 }
{ "text": "not dogs and not cats", "bad": 0.0 }


================================================
FILE: experiments/tf_trainer/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/common/BUILD
================================================
py_library(
    name = "types",
    srcs = [
        "types.py",
    ],
)

py_library(
    name = "model_trainer",
    srcs = [
        "model_trainer.py",
    ],
    deps = [
        ":base_model",
        ":data_input",
        ":text_preprocessor",
        ":types",
    ],
)

py_library(
    name = "token_embedding_index",
    srcs = [
        "token_embedding_index.py",
    ],
    deps = [
        ":base_model",
        ":types",
    ],
)

py_test(
    name = "token_embedding_index_test",
    srcs = ["token_embedding_index_test.py"],
    data = ["//testdata:cats_and_dogs_onehot.vocab.txt"],
    deps = [
        ":token_embedding_index",
        ":types",
    ],
)

py_library(
    name = "text_preprocessor",
    srcs = [
        "text_preprocessor.py",
    ],
    deps = [
        ":base_model",
        ":token_embedding_index",
        ":types",
    ],
)

py_test(
    name = "text_preprocessor_test",
    srcs = ["text_preprocessor_test.py"],
    data = [
        "//testdata:cats_and_dogs_onehot.vocab.txt",
    ],
    deps = [
        ":text_preprocessor",
        ":types",
    ],
)

py_library(
    name = "base_model",
    srcs = [
        "base_model.py",
    ],
    deps = [":types"],
)

py_library(
    name = "data_input",
    srcs = [
        "dataset_input.py",
        "tfrecord_input.py",
        ":base_model",
    ],
    deps = [":types"],
)

py_test(
    name = "tfrecord_input_test",
    srcs = ["tfrecord_input_test.py"],
    deps = [
        ":data_input",
        ":types",
    ],
)

py_library(
    name = "cnn_spec_parser",
    srcs = ["cnn_spec_parser.py"],
    deps = [":types"],
)

py_test(
    name = "cnn_spec_parser_test",
    srcs = ["cnn_spec_parser_test.py"],
    deps = [
        ":cnn_spec_parser",
        ":types",
    ],
)

py_library(
    name = "episodic_tfrecord_input",
    srcs = ["episodic_tfrecord_input.py"],
    deps = [
        ":types",
        ":base_model",
        ":data_input",
    ],
)


================================================
FILE: experiments/tf_trainer/common/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/common/base_model.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Interface for Models."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import tensorflow as tf

from tf_trainer.common import types
from typing import Callable

# The TF Example key associated with input features that consist of an
# UTF-8 string, for models that use that as input.
TEXT_FEATURE_KEY = 'text'

# The TF Example key associated with a Tensor of int32s for models that
# use tokens from a vocabulary as input.
TOKENS_FEATURE_KEY = 'tokens'

# The TF Example key associated with examples in inference that consist of
# an int64 integer. It is a unique identifier of the TF Example and is passed
# along by the estimator and returned in the predictions (forward_features).
EXAMPLE_KEY = 'comment_key'


class BaseModel(abc.ABC):
  """Tentative interface for all model classes.

  Although the code doesn't take advantage of this interface yet, all models
  should subclass this one.
  """

  def map(self, f: Callable[[tf.estimator.Estimator], tf.estimator.Estimator]
         ) -> 'BaseModel':
    """Allows models to be extended. e.g.

    adding preprocessing steps.
    """

    class Model(BaseModel):

      def estimator(unused, model_dir):
        del unused
        return f(self.estimator(model_dir))

      def hparams(unused):
        del unused
        return self.hparams()

    return Model()

  @abc.abstractmethod
  def estimator(self, model_dir: str) -> tf.estimator.Estimator:
    pass

  def hparams(self) -> tf.contrib.training.HParams:
    return tf.contrib.training.HParams()


================================================
FILE: experiments/tf_trainer/common/basic_gpu_config.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU

================================================
FILE: experiments/tf_trainer/common/cnn_spec_parser.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""CNN Specification Parser.

A simple parser for specifications of convolutional layers.

BNF defining the syntax to specify CNNs:
```
  layers = layer : layers
  layer = filters
  filters = filter, filters
  filter = (size / stride -> num_filters)
  size, stride, num_filters = \d+
```

Inspiration for the notation comes from: `num_filters` being the output
embedding size, and the other dimension of the computed CNN matrix will be
`input_size * size / stride`.
"""

import re
from typing import List

layers_split_regexp = re.compile(r'\s*:\s*')
filters_split_regexp = re.compile(r'\s*,\s*')
filter_regexp = re.compile(r'\(\s*(?P<size>\d+)\s*/\s*(?P<stride>\d+)\s*'
                           r'\-\>\s*(?P<num_filters>\d+)\s*\)')


class FilterParseError(Exception):
  pass


class Filter(object):
  """A single CNN filter.

  filter = '(size / stride -> num_filters)'
  """

  def __init__(self, str: str) -> None:
    m = filter_regexp.match(str)
    if m is None:
      raise FilterParseError('Bad filter definition for: %s' % str)
    self.num_filters = int(m.group('num_filters'))  # type "int"
    self.size = int(m.group('size'))  # type "int"
    self.stride = int(m.group('stride'))  # type "int"

  def __str__(self) -> str:
    return ('(%d / %d -> %d)' % (self.size, self.stride, self.num_filters))


class ConcurrentFilters(object):
  """A set of concurrent CNN filters that make up one layer

  filters = filter, filters
  """

  def __init__(self, str: str) -> None:
    filter_spec_strs = filters_split_regexp.split(str)
    self.filters = [Filter(s) for s in filter_spec_strs]

  def __str__(self) -> str:
    return ', '.join([str(f) for f in self.filters])


class SequentialLayers(object):
  """A sequence of CNN layers

  layers = filters : layers
  """

  def __init__(self, str: str) -> None:
    layer_spec_strs = layers_split_regexp.split(str)
    self.layers = [ConcurrentFilters(s) for s in layer_spec_strs
                  ]  # type: List[ConcurrentFilters]

  def __str__(self) -> str:
    return ' : '.join([str(f) for f in self.layers])


================================================
FILE: experiments/tf_trainer/common/cnn_spec_parser_test.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tfrecord_input."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tf_trainer.common.cnn_spec_parser import SequentialLayers
from tf_trainer.common.cnn_spec_parser import ConcurrentFilters
from tf_trainer.common.cnn_spec_parser import Filter


class CnnSpecParserTest(tf.test.TestCase):

  def test_SequentialLayers(self):
    s = ('(2 / 2 -> 100), (3 / 2 -> 101) '
         ': (6 / 2 -> 102) '
         ': (3 / 1 -> 103)')
    spec = SequentialLayers(s)
    layer0 = spec.layers[0]
    self.assertEqual(len(layer0.filters), 2)
    layer0filter0 = layer0.filters[0]  # type: Filter
    self.assertEqual(layer0filter0.size, 2)
    self.assertEqual(layer0filter0.stride, 2)
    self.assertEqual(layer0filter0.num_filters, 100)
    self.assertEqual(str(spec), s)


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: experiments/tf_trainer/common/dataset_config.sh
================================================
#!/bin/bash

BASE_PATH="gs://conversationai-models"
GCS_RESOURCES="${BASE_PATH}/resources"
MODEL_PARENT_DIR="${BASE_PATH}/tf_trainer_runs"

if [ "$1" == "civil_comments" ]; then
    train_path="${GCS_RESOURCES}/civil_comments_data/train_eval_test/train-*.tfrecord"
    valid_path="${GCS_RESOURCES}/civil_comments_data/train_eval_test/eval-*.tfrecord"
    labels="toxicity"
    label_dtypes="float"
    text_feature="comment_text"

elif [ "$1" == "toxicity" ]; then
    train_path="${GCS_RESOURCES}/toxicity_data/toxicity_q42017_train.tfrecord"
    valid_path="${GCS_RESOURCES}/toxicity_data/toxicity_q42017_validate.tfrecord"
    labels="frac_neg"
    label_dtypes="float"
    text_feature="comment_text"

elif [ "$1" == "many_communities" ]; then
    train_path="${GCS_RESOURCES}/transfer_learning_data/many_communities/20181105_train.tfrecord"
    valid_path="${GCS_RESOURCES}/transfer_learning_data/many_communities/20181105_validate.tfrecord"
    labels="removed"
    # removed is a boolean variable cast as an int.
    # 1 means that the comment was removed and 0 means it was not.
    label_dtypes="int"
    text_feature="comment_text"

elif [ "$1" == "many_communities_40_per_8_shot" ]; then

    if [ "$2" == "optimistic" ]; then
        train_path="${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/augmented_train.tfrecord"
    elif [ "$2" == "pessimistic" ]; then
        train_path="${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/original_train..tfrecord"
    else
        echo "Must provide second positional argument."
        exit 1
    fi

    valid_path="${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord"
    # test_path = "${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord"
    labels="label"
    # removed is a boolean variable cast as an int.
    # 1 means that the comment was removed and 0 means it was not.
    label_dtypes="int"
    text_feature="text"

    # used for param tuning
    train_steps=3000
    eval_steps=250
    eval_period=200

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    exit 1
fi


================================================
FILE: experiments/tf_trainer/common/dataset_input.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Abstract Base Class for DatasetInput."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
from tf_trainer.common import types


class DatasetInput(abc.ABC):
  """Abstract Base Class for Dataset Input.

  Provides the input functions (referred to as input_fn in TF docs) to be used
  with Tensorflow Estimator's train, evaluate, and predict methods.
  """

  @abc.abstractmethod
  def train_input_fn(self) -> types.EstimatorInput:
    pass

  @abc.abstractmethod
  def validate_input_fn(self) -> types.EstimatorInput:
    pass


================================================
FILE: experiments/tf_trainer/common/episodic_tfrecord_input.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DatasetInput implementation for episodic data."""

import tensorflow as tf
from pathlib import Path

import collections
import os
import random

from tf_trainer.common import dataset_input
from tf_trainer.common import types
from typing import List, Dict, Tuple, Union

tf.app.flags.DEFINE_string('train_path', None,
                           'Path to the training data TFRecord file.')
tf.app.flags.DEFINE_string('dev_path', None,
                           'Path to the training data TFRecord file.')
tf.app.flags.DEFINE_string('episode_size', None,
                           'Path to the training data TFRecord file.')

Text = Union[tf.Tensor, str]
Label = Union[tf.Tensor, float]

TextDomainLabel = collections.namedtuple('TextDomainLabel',
                                         ['text', 'domain', 'label'])
EpisodeData = collections.namedtuple('EpisodeData',
                                     ['texts', 'domains', 'labels'])


class EpisodicTFRecordInput(dataset_input.DatasetInput):
  """Generates episodic data."""

  def __init__(self, train_dir, validate_dir) -> None:
    self.train_dir = train_dir
    self.validate_dir = validate_dir

  def train_input_fn(self) -> types.FeatureAndLabelTensors:
    all_episodes = self._get_randomized_episodes(self.train_dir)
    all_texts = [ep.texts for ep in all_episodes]
    all_domains = [ep.domains for ep in all_episodes]
    all_labels = [ep.labels for ep in all_episodes]
    ds = tf.data.Dataset.from_tensor_slices((all_texts, all_domains,
                                             all_labels))
    self.episode_batches_itr = ds.make_one_shot_iterator()
    return self.episode_batches_itr.get_next()

  def validate_input_fn(self) -> types.FeatureAndLabelTensors:
    pass

  def _get_randomized_episodes(self, directory: str) -> List[EpisodeData]:
    """Retrieves a list of domain specific datasets.

    Given a directory of TFRecord files, each holding data for a given domain,
    with file name "[domain].tfrecord", returns an iterator of datasets, each
    corresponding to the data for a single domain.
    """

    tfrecord_files = tf.gfile.Glob(os.path.join(directory, '*.tfrecord'))
    episodes = []
    for file_no, tfrecord_file in enumerate(tfrecord_files):
      tf.logging.info('PROCESSING FILE {}: {}'.format(file_no, tfrecord_file))
      episodes.append(self._dataset_from_tfrecord_file(tfrecord_file))

    tf.logging.info('Shuffling episodes')
    random.shuffle(episodes)  # In place shuffle.

    return episodes

  def _dataset_from_tfrecord_file(self, tfrecord_file: str) -> EpisodeData:
    # The domain happens to be the file stem.
    domain = Path(tfrecord_file).stem

    def _read_tf_example(record) -> TextDomainLabel:
      parsed = tf.parse_single_example(
          record, {
              'text': tf.FixedLenFeature([], tf.string),
              'label': tf.FixedLenFeature([], tf.int64)
          })  # type: Dict[str, types.Tensor]

      return TextDomainLabel(
          text=parsed['text'], domain=domain, label=parsed['label'])

    examples = list(tf.python_io.tf_record_iterator(tfrecord_file))
    random.shuffle(examples)

    datapoints = [_read_tf_example(example) for example in examples]
    return EpisodeData(
        texts=[dp.text for dp in datapoints],
        domains=[dp.domain for dp in datapoints],
        labels=[dp.label for dp in datapoints])


================================================
FILE: experiments/tf_trainer/common/episodic_tfrecord_input_test.py
================================================
"""Tests for episodic_tfrecord_input."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tf_trainer.common import episodic_tfrecord_input


class EpisodicTFRecordInputTest(tf.test.TestCase):

  def test(self):
    train_dir = 'gs://kaggle-model-experiments/resources/transfer_learning_data/many_communities_pruned_episodes'
    tf.logging.info('CREATE')
    e = episodic_tfrecord_input.EpisodicTFRecordInput(train_dir, 'asdf')
    tf.logging.info('GET DATA')
    episodic_batch = e.train_input_fn()
    with tf.Session() as session:
      tf.logging.info('FIRST BATCH')
      tf.logging.info(session.run(episodic_batch))
      tf.logging.info('SECOND BATCH')
      print(session.run(episodic_batch))


if __name__ == '__main__':
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.test.main()


================================================
FILE: experiments/tf_trainer/common/model_trainer.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The Model Trainer class.

This provides an abstraction of Keras and TF.Estimator, and is intended for use
in text classification models (although it may generalize to other kinds of
problems).
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import os
import os.path
import six

import tensorflow as tf
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.estimator import estimator as estimator_lib
from tensorflow.python.estimator import model_fn as model_fn_lib
from tensorflow.python.estimator.export.export_output import PredictOutput
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.training import optimizer as optimizer_lib
from tensorflow.python.lib.io import file_io

from tf_trainer.common import base_model
from tf_trainer.common import dataset_input as ds

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('model_dir', None,
                           "Directory for the Estimator's model directory.")
tf.app.flags.DEFINE_string('warm_start_from', None,
                           'Existing checkpoint from which to start training.')
tf.app.flags.DEFINE_bool('enable_profiling', False,
                         'Enable profiler hook in estimator.')
tf.app.flags.DEFINE_integer(
    'n_export', -1, 'Number of models to export.'
    'If =-1, only the best checkpoint (wrt specified eval metric) is exported.'
    'If =1, only the last checkpoint is exported.'
    'If >1, we export `n_export` evenly-spaced checkpoints.')
tf.app.flags.DEFINE_string('key_name', 'comment_key',
                           'Name of a pass-thru integer id for batch scoring.')

tf.app.flags.DEFINE_integer('train_steps', 100000,
                            'The number of steps to train for.')
tf.app.flags.DEFINE_integer('eval_period', 1000,
                            'The number of steps per eval period.')
tf.app.flags.DEFINE_integer('eval_steps', None,
                            'Number of examples to eval for, default all.')

tf.app.flags.mark_flag_as_required('model_dir')


# Copied from:
# https://stackoverflow.com/questions/49846207/tensorflow-estimator-warm-start-from-and-model-dir
class InitHook(tf.train.SessionRunHook):
  """Initializes model from a checkpoint_path
  
    Args:
      checkpoint_dir: full path to dir containing the checkpoint
  """
  def __init__(self, checkpoint_dir):
    self.model_path = checkpoint_dir
    self.initialized = False

  def begin(self):
    """
    Restore parameters if a pre-trained model is available and
    we haven't trained previously.
    """
    if not self.initialized:
      #checkpoint = tf.train.latest_checkpoint(self.model_path)
      all_checkpoints = file_io.get_matching_files(os.path.join(
        self.model_path, 'model.ckpt-*.index'))

      if not all_checkpoints:
        raise ValueError('No checkpoint files found matching %s.' % (
          self.model_path + '*'))

      all_checkpoints = [x.replace('.index', '') for x in all_checkpoints]
      all_checkpoints = sorted(all_checkpoints, key=lambda x: int(x.split('-')[-1]))
      checkpoint = all_checkpoints[-1]

      if checkpoint is None:
        logging.info('No pre-trained model is available at %s, '
          'training from scratch.' % self.model_path)
      else:
        logging.info('Pre-trained model {0} found in {1} - warmstarting.'.format(
          checkpoint, self.model_path))
        tf.train.warm_start(checkpoint)
      self.initialized = True


# This function extends tf.contrib.estimator.forward_features.
# As the binary_head has a ClassificationOutput for serving_default,
# the check at the end of 'new_model_fn' fails in the initial fn.
def forward_features(estimator, keys, sparse_default_values=None):
  """Forward features to predictions dictionary.

  In some cases, user wants to see some of the features in estimators prediction
  output. As an example, consider a batch prediction service: The service simply
  runs inference on the users graph and returns the results. Keys are essential
  because there is no order guarantee on the outputs so they need to be rejoined
  to the inputs via keys or transclusion of the inputs in the outputs.
  Example:
  ```python
    def input_fn():
      features, labels = ...
      features['unique_example_id'] = ...
      features, labels
    estimator = tf.estimator.LinearClassifier(...)
    estimator = tf.contrib.estimator.forward_features(
        estimator, 'unique_example_id')
    estimator.train(...)
    assert 'unique_example_id' in estimator.predict(...)
  ```
  Args:
    estimator: A `tf.estimator.Estimator` object.
    keys: A `string`
    sparse_default_values: A dict of `str` keys mapping the name of the sparse
      features to be converted to dense, to the default value to use. Only
      sparse features indicated in the dictionary are converted to dense and the
      provided default value is used.

  Returns:
      A new `tf.estimator.Estimator` which forwards features to predictions.
  Raises:
    ValueError:
      * if `keys` is already part of `predictions`. We don't allow
        override.
      * if 'keys' does not exist in `features`.
    TypeError: if `keys` type is not one of `string` or list/tuple of `string`.
  """

  def verify_key_types(keys):  # pylint: disable=missing-docstring
    if keys is None:
      return keys
    if isinstance(keys, six.string_types):
      return [keys]
    if not isinstance(keys, (list, tuple)):
      raise TypeError('keys should be either a string or a list of strings. '
                      'Given: {}'.format(type(keys)))
    for key in keys:
      if not isinstance(key, six.string_types):
        raise TypeError('All items in the given keys list should be a string. '
                        'There exist an item with type: {}'.format(type(key)))
    return keys

  def get_keys(features):
    if keys is None:
      return features.keys()
    return keys

  def verify_keys_and_predictions(features, predictions):
    if not isinstance(predictions, dict):
      raise ValueError(
          'Predictions should be a dict to be able to forward features. '
          'Given: {}'.format(type(predictions)))
    for key in get_keys(features):
      if key not in features:
        raise ValueError(
            'keys should be exist in features. Key "{}" is not in features '
            'dict. features dict has following keys: {}. Please check '
            'arguments of forward_features.'.format(key, features.keys()))
      if key in predictions:
        raise ValueError(
            'Cannot forward feature key ({}). Since it does exist in '
            'predictions. Existing prediction keys: {}. Please check arguments '
            'of forward_features.'.format(key, predictions.keys()))

  keys = verify_key_types(keys)

  def new_model_fn(features, labels, mode, config):  # pylint: disable=missing-docstring
    spec = estimator.model_fn(features, labels, mode, config)
    predictions = spec.predictions
    if predictions is None:
      return spec
    verify_keys_and_predictions(features, predictions)
    for key in get_keys(features):
      feature = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(
          features[key])
      if sparse_default_values and (key in sparse_default_values):
        if not isinstance(feature, sparse_tensor_lib.SparseTensor):
          raise ValueError(
              'Feature ({}) is expected to be a `SparseTensor`.'.format(key))
        feature = sparse_ops.sparse_tensor_to_dense(
            feature, default_value=sparse_default_values[key])
      if not isinstance(feature, ops.Tensor):
        raise ValueError(
            'Feature ({}) should be a Tensor. Please use `keys` '
            'argument of forward_features to filter unwanted features, or'
            'add key to argument `sparse_default_values`.'
            'Type of features[{}] is {}.'.format(key, key, type(feature)))
      predictions[key] = feature
    spec = spec._replace(predictions=predictions)
    if spec.export_outputs:  # CHANGES HERE
      outputs = spec.export_outputs['predict'].outputs
      outputs[key] = spec.predictions[key]
      spec.export_outputs['predict'] = tf.estimator.export.PredictOutput(
          outputs)
      spec.export_outputs[
          'serving_default'] = tf.estimator.export.PredictOutput(outputs)
    return spec

  return estimator_lib.Estimator(
      model_fn=new_model_fn,
      model_dir=estimator.model_dir,
      config=estimator.config)


class ModelTrainer(object):
  """Model Trainer."""

  def __init__(self, dataset: ds.DatasetInput,
               model: base_model.BaseModel,
               warm_start_from: str = None) -> None:
    self._dataset = dataset
    self._model = model
    self._warm_start_from = warm_start_from
    self._estimator = model.estimator(self._model_dir())

  def train_with_eval(self):
    """Train with periodic evaluation.
    """
    training_hooks = None
    if FLAGS.enable_profiling:
      training_hooks = [
          tf.train.ProfilerHook(
              save_steps=10,
              output_dir=os.path.join(self._model_dir(), 'profiler')),
      ]

    if self._warm_start_from:
      init_hook = InitHook(checkpoint_dir=self._warm_start_from)
      if training_hooks:
        training_hooks.append(init_hook)
      else:
        training_hooks = [init_hook]

    train_spec = tf.estimator.TrainSpec(
        input_fn=self._dataset.train_input_fn,
        max_steps=FLAGS.train_steps,
        hooks=training_hooks)

    eval_spec = tf.estimator.EvalSpec(
        input_fn=self._dataset.validate_input_fn,
        steps=FLAGS.eval_steps,
        throttle_secs=1)

    self._estimator._config = self._estimator.config.replace(
        save_checkpoints_steps=FLAGS.eval_period)

    if FLAGS.n_export > 1 or FLAGS.n_export == -1:
      self._estimator._config = self._estimator.config.replace(
          keep_checkpoint_max=None)

    tf.estimator.train_and_evaluate(self._estimator, train_spec, eval_spec)

  def predict_on_dev(self, predict_keys=None):
    checkpoints, _ = self._get_list_checkpoint(1, self._model_dir(),
                                                         None, None)
    return self._estimator.predict(self._dataset.validate_input_fn,
                                   predict_keys=predict_keys,
                                   checkpoint_path=checkpoints[0])

  def eval_dir(self):
    return self._estimator.eval_dir()

  def _model_dir(self):
    """Get Model Directory.

    Used to scope logs to a given trial (when hyper param tuning) so that they
    don't run over each other. When running locally it will just use the passed
    in model_dir.
    """
    return os.path.join(
        FLAGS.model_dir,
        json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get(
            'trial', ''))

  def _add_estimator_key(self, estimator, example_key_name):
    """Adds a forward key to the model_fn of an estimator."""
    estimator = forward_features(estimator, example_key_name)
    return estimator


  def _get_best_step_from_event_file(self,
    event_file,
    metrics_key,
    is_first_metric_better_fn):
    """Find, in `event_file`, the step corresponding to the best metric.

    Args:
      event_file: The event file where to find the metrics.
      metrics_key: The metric by which to determine the best checkpoint to save.
      is_first_metric_better_fn: Comparison function to find best metric. Takes
          in as arguments two numbers, returns true if first is better than
          second. Default function says larger is better. Default value works for
          AUC: higher is better.
    
    Returns:
      Best step (int).
    """
    if not metrics_key:
      return None
    best_metric = None
    best_step = None
    for e in tf.train.summary_iterator(event_file):
      for v in e.summary.value:
        if v.tag == metrics_key:
          metric = v.simple_value
          if not best_step or is_first_metric_better_fn(metric, best_metric):
            best_metric = metric
            best_step = e.step
    return best_step


  def _get_best_checkpoint(self,
    checkpoints,
    metrics_key,
    is_first_metric_better_fn):
    """Find the best checkpoint, according to `metrics_key`.

    Args:
      checkpoints: List of model checkpoints.
      metrics_key: The metric by which to determine the best checkpoint to save.
      is_first_metric_better_fn: Comparison function to find best metric. Takes
          in as arguments two numbers, returns true if first is better than
          second. Default function says larger is better. Default value works for
          AUC: higher is better.

    Returns:
      Best checkpoint path.
    """
    eval_event_dir = self._estimator.eval_dir()

    event_files = file_io.list_directory(eval_event_dir)
    if not event_files:
      raise ValueError('No event files found in directory %s.' % eval_event_dir)
    if len(event_files) > 1:
      print('Multiple event files found in dir %s. Using last one.' % eval_event_dir)
    
    event_file = os.path.join(eval_event_dir, event_files[-1])

    # Use the best step to find the best checkpoint.
    best_step = self._get_best_step_from_event_file(event_file, metrics_key,
      is_first_metric_better_fn)
    
    # If we couldn't find metrics_key in the event file, try again using loss.
    if best_step is None:
      print("Metrics key %s not found in metrics, using 'loss' as metric key." %
            metrics_key)
      metrics_key = "loss"
      # Want the checkpoint with the lowest loss
      is_first_metric_better_fn = lambda x, y: x < y

      best_step = self._get_best_step_from_event_file(event_file, metrics_key,
        is_first_metric_better_fn)

    if best_step is None:
      raise ValueError("Couldn't find 'loss' metric in event file %s." % event_file)

    best_checkpoint_path = None
    for checkpoint_path in checkpoints:
      version = int(checkpoint_path.split('-')[-1])
      if version == best_step:
        best_checkpoint_path = checkpoint_path

    if not best_checkpoint_path:
      raise ValueError("Couldn't find checkpoint for best_step = %d." % best_step)

    return best_checkpoint_path


  def _get_list_checkpoint(self,
    n_export,
    model_dir,
    metrics_key,
    is_first_metric_better_fn):
    """Get the checkpoints that we want to export, as well as the ones to clean up.

    Args:
      n_export: Number of models to export.
      model_dir: Directory containing the checkpoints.
      metrics_key: The metric by which to determine the best checkpoint to save.
      is_first_metric_better_fn: Comparison function to find best metric. Takes
          in as arguments two numbers, returns true if first is better than
          second. Default function says larger is better. Default value works for
          AUC: higher is better.

    Returns:
      Tuple of:
        List of checkpoint paths to export,
        Set of checkpoint paths to delete.

    If n_export==1, we take only the last checkpoint.
    If n_export==-1, we take the best checkpoint, according to `metrics_key` and
      `is_first_metric_better_fn`. The remaining checkpoints are deleted.
    Otherwise, we consider the list of steps for each for which we have a
    checkpoint. Then we choose n_export number of checkpoints such that their
    steps are as equidistant as possible.
    """
    all_checkpoints = file_io.get_matching_files(
        os.path.join(model_dir, 'model.ckpt-*.index'))

    if not all_checkpoints:
      raise ValueError('No checkpoint files found matching model.ckpt-*.index.')

    all_checkpoints = [x.replace('.index', '') for x in all_checkpoints]
    all_checkpoints = sorted(all_checkpoints, key=lambda x: int(x.split('-')[-1]))

    # Keep track of the checkpoints to export, and the ones to delete.
    checkpoints_to_export = None
    checkpoints_to_delete = None

    if n_export == 1:
      checkpoints_to_export = [all_checkpoints[-1]]
    elif n_export == -1:
      checkpoints_to_export = [self._get_best_checkpoint(all_checkpoints, metrics_key,
                                                         is_first_metric_better_fn)]
    elif n_export > 1:
      # We want to cover a distance of (len(checkpoints) - 1): for 3 points, we have a distance of 2.
      # with a number of points of (n_export -1): because 1 point is set at the end.
      step = float(len(all_checkpoints) - 1) / (n_export - 1)
      if step <= 1:  # Fewer checkpoints available than the desired number.
        return all_checkpoints, None

      checkpoints_to_export = [
          all_checkpoints[int(i * step)] for i in range(n_export - 1)
      ]
      checkpoints_to_export.append(all_checkpoints[-1])

    if checkpoints_to_export:
      checkpoints_to_delete = set(all_checkpoints) - set(checkpoints_to_export)

    return checkpoints_to_export, checkpoints_to_delete


  def export(self,
    serving_input_fn,
    example_key_name=None,
    metrics_key=None,
    is_first_metric_better_fn=lambda x, y: x > y,
    delete_unexported_checkpoints=True):
    """Export model as a .pb.

    Args:
      serving_input_fn: An input function for inference graph.
      example_key_name: Name of the example_key field (string).
          If None, no example_key will be used.
      metrics_key: The metric by which to determine the best checkpoint to save.
      is_first_metric_better_fn: Comparison function to find best metric. Takes
          in as arguments 3 numbers, returns true if first is better than
          second. Default function says larger is better. Default value works for
          AUC: higher is better.
      delete_unexported_checkpoints: Boolean flag indicating whether or not to delete
        the checkpoints that aren't exported. If False then all model checkpoints are
        retained.

      NOTE: if using a different metrics_key than AUC, make sure `is_first_metric_better_fn`
        is updated accordingly.

    Example keys are useful when doing batch predictions. Typically,
      the predictions are done by a cluster of machines and the order of
      the results is random. Here, we add a forward feature in the inference graph
      (https://www.tensorflow.org/api_docs/python/tf/contrib/estimator/forward_features)
      which will be used as an example unique identifier. In inference, the input
      example includes an example_key field that is passed along by the estimator
      and returned in the predictions.
    """
    if FLAGS.n_export == -1:
      if not is_first_metric_better_fn:
        raise ValueError('Must provide valid `is_first_metric_better_fn` '
          'when exporting best checkpoint.')
      if not metrics_key:
        print('No value provided for `metrics_key`. Using loss.')
        metrics_key = 'loss'
        is_first_metric_better_fn = lambda x, y: x < y

    estimator = self._estimator
    if example_key_name:
      estimator = self._add_estimator_key(self._estimator, example_key_name)

    checkpoints_to_export, checkpoints_to_delete = self._get_list_checkpoint(
      FLAGS.n_export, self._model_dir(), metrics_key, is_first_metric_better_fn)

    # Delete the checkpoints we don't want.
    if checkpoints_to_delete and delete_unexported_checkpoints:
      for ckpt in checkpoints_to_delete:
        tf.train.remove_checkpoint(ckpt)

    # Export the desired checkpoints.
    if checkpoints_to_export:
      for checkpoint_path in checkpoints_to_export:
        version = checkpoint_path.split('-')[-1]
        estimator.export_savedmodel(
          export_dir_base=os.path.join(self._model_dir(), version),
          serving_input_receiver_fn=serving_input_fn,
          checkpoint_path=checkpoint_path)


================================================
FILE: experiments/tf_trainer/common/p100_config.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: CUSTOM
  masterType: standard_p100
  workerType: standard_p100
  parameterServerType: large_model
  workerCount: 1
  parameterServerCount: 1

================================================
FILE: experiments/tf_trainer/common/serving_input.py
================================================
"""Serving functions for deployed model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.ops import array_ops

FLAGS = tf.app.flags.FLAGS

def create_text_serving_input_fn(text_feature_name, example_key_name):

  def serving_input_fn_tfrecords():
    serialized_example = tf.placeholder(
        shape=[None], dtype=tf.string, name="input_example_tensor")
    feature_spec = {
        text_feature_name:
            tf.FixedLenFeature([], dtype=tf.string),
        example_key_name:
            tf.FixedLenFeature([], dtype=tf.int64, default_value=-1)
    }

    features = tf.parse_example(serialized_example, feature_spec)

    return tf.estimator.export.ServingInputReceiver(features,
                                                    serialized_example)

  return serving_input_fn_tfrecords


def create_serving_input_fn(word_to_idx,
                            unknown_token,
                            text_feature_name,
                            example_key_name):

  def serving_input_fn_tfrecords():

    serialized_example = tf.placeholder(
        shape=[None], dtype=tf.string, name="input_example_tensor")
    feature_spec = {
        text_feature_name: tf.VarLenFeature(dtype=tf.string),
        example_key_name: tf.FixedLenFeature([], dtype=tf.int64, default_value=-1)
    }

    features = tf.parse_example(serialized_example, feature_spec)

    keys = list(word_to_idx.keys())
    values = list(word_to_idx.values())
    vocabulary_table = tf.contrib.lookup.HashTable(
        tf.contrib.lookup.KeyValueTensorInitializer(
            keys, values, key_dtype=tf.string, value_dtype=tf.int64),
        unknown_token)
    words_int_sparse = vocabulary_table.lookup(features[text_feature_name])
    words_int_dense = tf.sparse_tensor_to_dense(
        words_int_sparse, default_value=0)
    features[text_feature_name] = words_int_dense

    return tf.estimator.export.ServingInputReceiver(features,
                                                    serialized_example)

  return serving_input_fn_tfrecords


================================================
FILE: experiments/tf_trainer/common/text_preprocessor.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Text Preprocessor."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import functools

from absl import flags
import numpy as np
import tensorflow as tf
from tf_trainer.common import base_model
from tf_trainer.common import types
from tf_trainer.common.token_embedding_index import LoadTokenIdxEmbeddings
from typing import Callable, Dict, List, Optional, Tuple

FLAGS = flags.FLAGS

tf.app.flags.DEFINE_bool('is_embedding_trainable', False,
                         'Enable fine tuning of embeddings.')


class TextPreprocessor(object):
  """Text Preprocessor TensorFlow Estimator Extension.

  Uses embedding indexes to create tensors that map tokens (provided by an
  abstract tokenizer funtion) to embeddings.

  Note: Due to the lack of text preprocessing functions in tensorflow, we expect
  that the text is already preprocessed (list of words) in inference. In
  training, due to the availability of tf.py_func, we can handle the
  preprocessing.
  """

  def __init__(self, embeddings_path: str) -> None:
    self._word_to_idx, self._embeddings_matrix, self._unknown_token, self._embedding_size = \
      LoadTokenIdxEmbeddings(embeddings_path)  # type: Tuple[Dict[str, int], np.ndarray, int, int]

  def train_preprocess_fn(self,
                          tokenizer: Callable[[str], List[str]],
                          lowercase: Optional[bool] = True
                         ) -> Callable[[types.Tensor], types.Tensor]:

    def _tokenize(text: bytes) -> np.ndarray:
      """Converts text to a list of words.

      Args:
        text: text to tokenize (string).
        lowercase: whether to include lowercasing in preprocessing (boolean).
        tokenizer: Python function to tokenize the text on.

      Returns:
        A list of strings (words).
      """

      words = tokenizer(text.decode('utf-8'))
      if lowercase:
        words = [w.lower() for w in words]
      return np.asarray(
          [self._word_to_idx.get(w, self._unknown_token) for w in words],
          dtype=np.int64)

    def _preprocess_fn(text: types.Tensor) -> types.Tensor:
      """Converts a text into a list of integers.

      Args:
        text: a 0-D string Tensor.

      Returns:
        A 1-D int64 Tensor.
      """
      words = tf.py_func(
          _tokenize, [text], tf.int64, stateful=False, name='PreprocessFn')
      return words

    return _preprocess_fn

  def add_embedding_to_model(self, model: base_model.BaseModel,
                             text_feature_name: str) -> base_model.BaseModel:
    """Returns a new BaseModel with an embedding layer prepended.

    Args:
      model: An existing BaseModel instance.
      text_feature_name: The name of the feature containing text.
    """
    return model.map(
        functools.partial(self.create_estimator_with_embedding,
                          text_feature_name))

  def create_estimator_with_embedding(
      self, text_feature_name: str,
      estimator: tf.estimator.Estimator) -> tf.estimator.Estimator:
    """Takes an existing estimator and prepends the embedding layers to it.

    Args:
      estimator: A predefined Estimator that expects embeddings.
      text_feature_name: The name of the feature containing the text.

    Returns:
      TF Estimator with embedding ops added.

    Note: We need to consider the case of large embeddings (see:
      https://stackoverflow.com/questions/48217599/
      how-to-initialize-embeddings-layer-within-estimator-api/48243086#48243086).
    """
    old_model_fn = estimator.model_fn
    old_config = estimator.config
    old_params = estimator.params

    def add_init_fn_to_estimatorSpec(estimator_spec, init_fn):
      """Add a new init_fn to the scaffold part of estimator spec."""

      def new_init_fn(scaffold, sess):
        init_fn(scaffold, sess)
        if estimator_spec.scaffold.init_fn:
          estimator_spec.scaffold.init_fn(scaffold, sess)

      scaffold = tf.train.Scaffold(
          init_fn=new_init_fn, copy_from_scaffold=estimator_spec.scaffold)
      estimator_spec_with_scaffold = tf.estimator.EstimatorSpec(
          mode=estimator_spec.mode,
          predictions=estimator_spec.predictions,
          loss=estimator_spec.loss,
          train_op=estimator_spec.train_op,
          eval_metric_ops=estimator_spec.eval_metric_ops,
          export_outputs=estimator_spec.export_outputs,
          training_chief_hooks=estimator_spec.training_chief_hooks,
          training_hooks=estimator_spec.training_hooks,
          scaffold=scaffold,
          evaluation_hooks=estimator_spec.evaluation_hooks,
          prediction_hooks=estimator_spec.prediction_hooks)
      return estimator_spec_with_scaffold

    def new_model_fn(features, labels, mode, params, config):
      """model_fn used in defining the new TF Estimator"""

      embeddings, embedding_init_fn = self.word_embeddings(
          trainable=FLAGS.is_embedding_trainable)

      text_feature = features[text_feature_name]
      word_embeddings = tf.nn.embedding_lookup(embeddings, text_feature)
      new_features = {text_feature_name: word_embeddings}

      # Fix dimensions to make Keras model output match label dims.
      if mode != tf.estimator.ModeKeys.PREDICT:
        labels = {k: tf.expand_dims(v, -1) for k, v in labels.items()}

      # TODO: Modify when embeddings are part of the model.
      estimator_spec = old_model_fn(
          new_features, labels, mode=mode, config=config)
      estimator_spec_with_scaffold = add_init_fn_to_estimatorSpec(
          estimator_spec, embedding_init_fn)

      return estimator_spec_with_scaffold

    return tf.estimator.Estimator(
        new_model_fn, config=old_config, params=old_params)

  def word_to_idx(self) -> Dict[str, int]:
    return self._word_to_idx

  def unknown_token(self) -> int:
    return self._unknown_token

  def word_embeddings(self, trainable) -> tf.Variable:
    """Get word embedding TF Variable."""

    embeddings = tf.get_variable(
        'embeddings', self._embeddings_matrix.shape, trainable=trainable)

    def init_fn(scaffold, sess):
      sess.run(embeddings.initializer,
               {embeddings.initial_value: self._embeddings_matrix})

    return embeddings, init_fn


================================================
FILE: experiments/tf_trainer/common/text_preprocessor_test.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for text_preprocessor."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tf_trainer.common import text_preprocessor


class TextPreprocessorTest(tf.test.TestCase):

  def test_Tokenize(self):
    preprocessor = text_preprocessor.TextPreprocessor(
        'testdata/cats_and_dogs_onehot.vocab.txt')
    with self.test_session() as session:
      preprocess_fn = preprocessor.train_preprocess_fn(
          tokenizer=lambda x: x.split(' '), lowercase=False)
      tokens = preprocess_fn('dogs good cats bad rabbits not')
      self.assertEqual(list(tokens.eval()), [1, 3, 2, 4, 7, 6])

  def test_Lowercase(self):
    preprocessor = text_preprocessor.TextPreprocessor(
        'testdata/cats_and_dogs_onehot.vocab.txt')
    with self.test_session() as session:
      preprocess_fn = preprocessor.train_preprocess_fn(
          tokenizer=lambda x: x.split(' '), lowercase=True)
      tokens = preprocess_fn('Dogs GOOD Cats BAD rabbits not')
      self.assertEqual(list(tokens.eval()), [1, 3, 2, 4, 7, 6])


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: experiments/tf_trainer/common/tfrecord_input.py
================================================
"""DatasetInput class based on TFRecord files."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import multiprocessing
import tensorflow as tf
from typing import Callable, List, Dict, Tuple

from tf_trainer.common import base_model
from tf_trainer.common import dataset_input
from tf_trainer.common import types

tf.app.flags.DEFINE_string('train_path', None,
                           'Path to the training data TFRecord file.')
tf.app.flags.DEFINE_string('validate_path', None,
                           'Path to the validation data TFRecord file.')
tf.app.flags.DEFINE_string('labels', 'frac_neg',
                           'Comma separated list of label features.')
tf.app.flags.DEFINE_string(
    'label_dtypes', None, 'Comma separated list of dtypes for labels. Each '
    'dtype must be float or int. If not provided '
    'assumes all labels are floats.')
tf.app.flags.DEFINE_string('text_feature', 'comment_text',
                           'Name of feature containing text input.')
tf.app.flags.DEFINE_boolean('round_labels', True,
                            'Round label features to 0 or 1 if true.')
tf.app.flags.DEFINE_integer('batch_size', 256,
                            'Batch sizes to use when reading.')
tf.app.flags.DEFINE_integer(
  'num_prefetch', 5,
  'An optimization parameter for the number of elements to prefetch. See: '
  'https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch')

FLAGS = tf.app.flags.FLAGS

DTYPE_MAPPING = {'float': tf.float32, 'int': tf.int64}

DTYPE_DEFAULT = {'float': -1.0, 'int': -1}


class TFRecordInput(dataset_input.DatasetInput):
  """Simple no-preprocessing TFRecord based DatasetInput.

  Handles parsing of TF Examples.

  Regardless of which TF Example feature key is used, as specified by the
  FLAGS.text_feature, the simple input will store the input text feature in
  the feature key _text_feature.
  """

  def __init__(self) -> None:
    self._labels = FLAGS.labels.split(',')
    if FLAGS.label_dtypes:
      self._label_dtypes = FLAGS.label_dtypes.split(',')
    else:
      self._label_dtypes = ['float'] * len(self._labels)
    self._batch_size = FLAGS.batch_size
    self._num_prefetch = FLAGS.num_prefetch
    self._text_feature = FLAGS.text_feature
    self._round_labels = FLAGS.round_labels

  def labels(self) -> List[str]:
    """List of the names of the float label features."""
    return self._labels

  def text_feature(self) -> str:
    """Name of the feature containing the input text from examples."""
    return self._text_feature

  def train_input_fn(self) -> tf.data.TFRecordDataset:
    """input_fn for TF Estimators for training set.

    Automatically repeats over input data forever. We define epoch limits in the
    model trainer.
    """
    assert FLAGS.train_path
    return self._input_fn_from_file(FLAGS.train_path).repeat()

  def validate_input_fn(self) -> tf.data.TFRecordDataset:
    """input_fn for TF Estimators for validation set."""
    assert FLAGS.validate_path
    return self._input_fn_from_file(FLAGS.validate_path)

  def _keys_to_features(self):
    keys_to_features = {}
    keys_to_features[self._text_feature] = tf.FixedLenFeature([], tf.string)
    for label, dtype in zip(self._labels, self._label_dtypes):
      keys_to_features[label] = tf.FixedLenFeature([], DTYPE_MAPPING[dtype],
                                                   DTYPE_DEFAULT[dtype])
    return keys_to_features

  def _input_fn_from_file(self, filepath: str) -> tf.data.TFRecordDataset:
    filenames_dataset = tf.data.Dataset.list_files(filepath)
    dataset = tf.data.TFRecordDataset(
        filenames_dataset)  # type: tf.data.TFRecordDataset
    parsed_dataset = dataset.map(
        self._read_tf_example, num_parallel_calls=multiprocessing.cpu_count())
    return parsed_dataset.batch(self._batch_size).prefetch(self._num_prefetch)

  def _process_labels(self, features, parsed):
    """Applies rounding and computes weights tied to feature presence.

    For all of the expected labels, if the value is negative, this
    indicates a missing feature from the input. A corresponding
    label name, suffixed by '_weight' will be added to the features
    with a value of 1.0 is present, and 0.0 if absent. The label
    value is rounded up or down (if enabled) and then mapped to
    zero if missing.

    Args:
        features: the input features read from a TF Example.
        parsed: the input labels read from a TF Example.

    Returns:
        A tuple of the features dict (with weights) and the labels dict.
    """
    # Make a deep copy to avoid changing the input.
    new_features = {k: v for k, v in features.items()}
    labels = {}
    for label in self._labels:
      label_value = tf.cast(parsed[label], dtype=tf.float32)
      # Missing values are negative, find them and zero those features out.
      weight = tf.cast(tf.greater_equal(label_value, 0.0), dtype=tf.float32)
      if self._round_labels:
        label_value = tf.round(label_value)
      new_features[label + '_weight'] = weight
      labels[label] = tf.multiply(label_value, weight)
    return new_features, labels

  def _read_tf_example(
      self,
      record: tf.Tensor,
  ) -> types.FeatureAndLabelTensors:
    """Parses TF Example protobuf into a text feature and labels.

    The input TF Example has a text feature as a singleton list with the full
    comment as the single element.
    """
    parsed = tf.parse_single_example(
        record, self._keys_to_features())  # type: Dict[str, types.Tensor]

    features = {base_model.TEXT_FEATURE_KEY: parsed[self._text_feature]}
    return self._process_labels(features, parsed)


class TFRecordInputWithTokenizer(TFRecordInput):
  """TFRecord based DatasetInput.

  Handles parsing of TF Examples.

  When handling text input, this class will rewrite the text input future,
  using the preprocessing fn. That is, the text feature will be rewritten
  as a new key in the output changing both the type and contents - from
  a string to a tensor of in integers representing tokens of some kind.
  TODO: preserve the original string and write a new key.
  """

  def __init__(self,
               train_preprocess_fn: Callable[[str], List[str]],
               max_seq_len: int = 30000) -> None:
    super().__init__()
    self._train_preprocess_fn = train_preprocess_fn
    self._max_seq_len = max_seq_len

  def _input_fn_from_file(self, filepath: str) -> types.FeatureAndLabelTensors:

    filenames_dataset = tf.data.Dataset.list_files(filepath)
    dataset = tf.data.TFRecordDataset(
        filenames_dataset)  # type: tf.data.TFRecordDataset

    parsed_dataset = dataset.map(
        self._read_tf_example, num_parallel_calls=multiprocessing.cpu_count())
    parsed_dataset = parsed_dataset.filter(lambda x, _: tf.less(
        x['sequence_length'], self._max_seq_len))

    feature_shapes = {
        base_model.TOKENS_FEATURE_KEY: [None],
        'sequence_length': []
    }
    for label in self._labels:
      feature_shapes[label + '_weight'] = []

    padded_shapes = (
      feature_shapes,
      {label: [] for label in self._labels})  # type: Tuple[Dict, Dict]
    parsed_dataset = parsed_dataset.apply(
        tf.contrib.data.bucket_by_sequence_length(
            element_length_func=lambda x, _: x['sequence_length'],
            bucket_boundaries=[(i + 1) * 20 for i in range(10)],
            bucket_batch_sizes=[self._batch_size] * 11,
            padded_shapes=padded_shapes))
    batched_dataset = parsed_dataset.prefetch(self._num_prefetch)
    return batched_dataset

  def _read_tf_example(
      self,
      record: tf.Tensor,
  ) -> types.FeatureAndLabelTensors:
    """Parses TF Example protobuf into a text feature and labels.

    The input TF Example has a text feature as a singleton list with the full
    comment as the single element.
    """
    parsed = tf.parse_single_example(
        record, self._keys_to_features())  # type: Dict[str, types.Tensor]

    text = parsed[self.text_feature()]
    tokens = self._train_preprocess_fn(text)
    features = {
        base_model.TOKENS_FEATURE_KEY: tokens,
        'sequence_length': tf.shape(tokens)[0],
    }
    return self._process_labels(features, parsed)


================================================
FILE: experiments/tf_trainer/common/tfrecord_input_test.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tfrecord_input."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from tf_trainer.common import base_model
from tf_trainer.common import tfrecord_input
from tf_trainer.common import types

FLAGS = tf.app.flags.FLAGS


class TFRecordInputTest(tf.test.TestCase):

  def setUp(self):
    FLAGS.text_feature = 'comment'
    ex = tf.train.Example(
        features=tf.train.Features(
            feature={
                'label':
                    tf.train.Feature(
                        float_list=tf.train.FloatList(value=[0.8])),
                'ignored-label':
                    tf.train.Feature(
                        float_list=tf.train.FloatList(value=[0.125])),
                'int_label':
                    tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
                'comment':
                    tf.train.Feature(
                        bytes_list=tf.train.BytesList(
                            value=['Hi there Bob'.encode('utf-8')]))
            }))
    self.ex_tensor = tf.convert_to_tensor(
        ex.SerializeToString(), dtype=tf.string)

  def test_TFRecordInput_unrounded(self):
    FLAGS.round_labels = False
    FLAGS.labels = 'label'
    dataset_input = tfrecord_input.TFRecordInput()

    with self.test_session():
      features, labels = dataset_input._read_tf_example(self.ex_tensor)
      self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(),
                       b'Hi there Bob')
      np.testing.assert_almost_equal(labels['label'].eval(), 0.8)
      np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0)
      self.assertCountEqual(list(labels), ['label'])
      self.assertCountEqual(list(features), ['text', 'label_weight'])

  def test_TFRecordInput_default_values(self):
    FLAGS.labels = 'label,fake_label,int_label'
    FLAGS.label_dtypes = 'float,float,int'
    FLAGS.round_labels = False
    dataset_input = tfrecord_input.TFRecordInput()

    with self.test_session():
      features, labels = dataset_input._read_tf_example(self.ex_tensor)
      self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(),
                       b'Hi there Bob')
      np.testing.assert_almost_equal(labels['label'].eval(), 0.8)
      np.testing.assert_almost_equal(labels['int_label'].eval(), 0.0)
      np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0)
      np.testing.assert_almost_equal(labels['fake_label'].eval(), 0.0)
      np.testing.assert_almost_equal(features['fake_label_weight'].eval(), 0.0)

  def test_TFRecordInput_rounded(self):
    FLAGS.labels = 'label'
    FLAGS.round_labels = True
    dataset_input = tfrecord_input.TFRecordInput()

    with self.test_session():
      features, labels = dataset_input._read_tf_example(self.ex_tensor)
      self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(),
                       b'Hi there Bob')
      np.testing.assert_almost_equal(labels['label'].eval(), 1.0)
      np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0)


class TFRecordInputWithTokenizerTest(tf.test.TestCase):

  def setUp(self):
    FLAGS.text_feature = 'comment'
    ex = tf.train.Example(
        features=tf.train.Features(
            feature={
                'label':
                    tf.train.Feature(
                        float_list=tf.train.FloatList(value=[0.8])),
                'int_label':
                    tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
                'comment':
                    tf.train.Feature(
                        bytes_list=tf.train.BytesList(
                            value=['Hi there Bob'.encode('utf-8')]))
            }))
    self.ex_tensor = tf.convert_to_tensor(
        ex.SerializeToString(), dtype=tf.string)

    self.word_to_idx = {'Hi': 12, 'there': 13}
    self.unknown_token = 999

  def preprocessor(self, text):
    return tf.py_func(
        lambda t: np.asarray([
            self.word_to_idx.get(x, self.unknown_token)
            for x in t.decode('utf-8').split(' ')
        ]), [text], tf.int64)

  def test_TFRecordInputWithTokenizer_unrounded(self):
    FLAGS.labels = 'label,fake_label,int_label,fake_int_label'
    FLAGS.label_dtypes = 'float,float,int,int'
    FLAGS.round_labels = False
    dataset_input = tfrecord_input.TFRecordInputWithTokenizer(
        train_preprocess_fn=self.preprocessor)

    with self.test_session():
      features, labels = dataset_input._read_tf_example(self.ex_tensor)
      self.assertEqual(
          list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999])
      self.assertAlmostEqual(labels['label'].eval(), 0.8)
      self.assertAlmostEqual(labels['fake_label'].eval(), 0.0)
      self.assertAlmostEqual(labels['int_label'].eval(), 0.0)
      self.assertAlmostEqual(labels['fake_int_label'].eval(), 0.0)
      self.assertAlmostEqual(features['label_weight'].eval(), 1.0)
      self.assertAlmostEqual(features['fake_label_weight'].eval(), 0.0)
      self.assertAlmostEqual(features['int_label_weight'].eval(), 1.0)
      self.assertAlmostEqual(features['fake_int_label_weight'].eval(), 0.0)

  def test_TFRecordInputWithTokenizer_default_values(self):
    FLAGS.labels = 'label,fake_label'
    FLAGS.round_labels = False
    dataset_input = tfrecord_input.TFRecordInputWithTokenizer(
        train_preprocess_fn=self.preprocessor)

    with self.test_session():
      features, labels = dataset_input._read_tf_example(self.ex_tensor)
      self.assertEqual(
          list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999])
      self.assertAlmostEqual(labels['label'].eval(), 0.8)
      self.assertAlmostEqual(labels['fake_label'].eval(), 0.0)
      self.assertAlmostEqual(features['label_weight'].eval(), 1.0)
      self.assertAlmostEqual(features['fake_label_weight'].eval(), 0.0)

  def test_TFRecordInputWithTokenizer_rounded(self):
    FLAGS.labels = 'label'
    FLAGS.round_labels = True
    dataset_input = tfrecord_input.TFRecordInputWithTokenizer(
        train_preprocess_fn=self.preprocessor)

    with self.test_session():
      features, labels = dataset_input._read_tf_example(self.ex_tensor)
      self.assertEqual(
          list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999])
      self.assertEqual(labels['label'].eval(), 1.0)
      self.assertEqual(features['label_weight'].eval(), 1.0)


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: experiments/tf_trainer/common/token_embedding_index.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Working with Token Embeding Indexes."""

from typing import Tuple, Dict, Optional, List, Callable
import numpy as np
import functools
import tensorflow as tf

def LoadTokenIdxEmbeddings(embeddings_path: str) \
  -> Tuple[Dict[str, int], np.ndarray, int, int]:
  """Generate word to idx mapping and word embeddings numpy array.

  We have two levels of indirection (e.g. word to idx and then idx to
  embedding) which could reduce embedding size if multiple words map to the
  same idx; although this is not currently a real or useful use-case.

  Args:
    embeddings_path: Local, GCS, or HDFS path to embedding file. Each line
      should be a word and its vector representation separated by a space.

  Returns:
    Tuple of:
      A vocabulary dictionary (mapping words to their index)
      A Numpy array of word embeddings with shape (vocab size, embedding size)
      A unique unknown token index (greater than all other token indexes)
      The size of the embeddings for words that is being used
  """
  word_to_idx = {}
  word_embeddings = []

  if not tf.gfile.Exists(embeddings_path):
    raise ValueError('File at %s does not exist.' % embeddings_path)

  with tf.gfile.Open(embeddings_path) as f:
    for idx, line in enumerate(f):
      values = line.split()
      word = values[0]
      word_embedding = np.asarray(values[1:], dtype='float32')
      word_to_idx[word] = idx + 1  # Reserve first row for padding
      word_embeddings.append(word_embedding)

  if not word_embeddings:
    raise ValueError('No embeddings loaded from %s.' % embeddings_path)

  # Add the padding "embedding"
  word_embeddings.insert(0, np.random.randn(len(word_embeddings[0])))

  # Convert embedding to numpy array and append the unknown word embedding,
  # which is the mean of all other embeddings.
  unknown_token = len(word_embeddings)
  embeddings_matrix = np.asarray(word_embeddings, dtype=np.float32)
  embeddings_matrix = np.append(
      embeddings_matrix, [embeddings_matrix.mean(axis=0)], axis=0)

  return word_to_idx, embeddings_matrix, unknown_token, len(word_embeddings[0])


================================================
FILE: experiments/tf_trainer/common/token_embedding_index_test.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tfrecord_input."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tf_trainer.common.token_embedding_index import LoadTokenIdxEmbeddings


class LoadTokenIdxEmbeddingsTest(tf.test.TestCase):

  def test_LoadTokenIdxEmbeddings(self):
    idx, embeddings, unknown_idx, embedding_size = LoadTokenIdxEmbeddings(
        'testdata/cats_and_dogs_onehot.vocab.txt')
    self.assertEqual(embedding_size, 6)
    self.assertEqual(unknown_idx, 7)
    self.assertEqual(idx['dogs'], 1)
    self.assertEqual(idx['cats'], 2)
    self.assertEqual(idx['not'], 6)
    self.assertEqual(embeddings[1][0], 1.0)
    self.assertEqual(embeddings[1][1], 0.0)
    # Note: padding embedding will be random, and is index 0. Also the unknown
    # token embedding will be random, and is index n+1; 7 in this case.

if __name__ == '__main__':
  tf.test.main()


================================================
FILE: experiments/tf_trainer/common/types.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Types for the tf_trainer module."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from typing import NewType, Union, Dict, Tuple

# Type aliases for convenience.

Tensor = Union[tf.Tensor, tf.SparseTensor]
TensorDict = Dict[str, Tensor]
TensorOrTensorDict = Union[tf.Tensor, TensorDict]
FeatureAndLabelTensors = Tuple[TensorOrTensorDict, TensorOrTensorDict]

# See: https://www.tensorflow.org/api_docs/python/tf/estimator/TrainSpec
EstimatorInput = Union[FeatureAndLabelTensors, tf.data.Dataset]


================================================
FILE: experiments/tf_trainer/common/v100_config.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: CUSTOM
  masterType: standard_v100


================================================
FILE: experiments/tf_trainer/tf_char_cnn/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/tf_char_cnn/hparam_config.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  # scaleTier: CUSTOM
  # masterType: standard
  # workerType: standard_gpu
  # parameterServerType: large_model
  # workerCount: 1
  # parameterServerCount: 1
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/toxicity # TODO: change based on dataset
    maxTrials: 120
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 1
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: INTEGER
        minValue: 16
        maxValue: 256
        scaleType: UNIT_LOG_SCALE
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 32
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_char_cnn/hparam_config_civil_comments.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/toxicity
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 64
        - 128
        - 256
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_char_cnn/hparam_config_many_communities.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/removed
    maxTrials: 150
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 64
        - 128
        - 256
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_char_cnn/hparam_config_toxicity.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/frac_neg
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_char_cnn/model.py
================================================
"""Tensorflow Estimator Character CNN."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.keras import layers
from tf_trainer.common import base_model
from typing import Set

FLAGS = tf.app.flags.FLAGS

# Hyperparameters
# TODO: Add validation
tf.app.flags.DEFINE_float('learning_rate', 0.0001,
                          'The learning rate to use during training.')
tf.app.flags.DEFINE_float('dropout_rate', 0.25,
                          'The dropout rate to use during training.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning.
# TODO: add link to relevant public issue/bug/documentation?
tf.app.flags.DEFINE_string(
    'filter_sizes', '5,7,9,11',
    'Comma delimited string for the sizes of convolution filters.')
tf.app.flags.DEFINE_integer(
    'num_filters', 16,
    'Number of convolutional filters for every convolutional layer.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning.
# TODO: add link to relevant public issue/bug/documentation?
tf.app.flags.DEFINE_string(
    'dense_units', '256,512,128',
    'Comma delimited string for the number of hidden units in the dense layer.')
tf.app.flags.DEFINE_integer(
    'embedding_size', 32,
    'The number of dimensions in the character embedding.')
tf.app.flags.DEFINE_string('pooling_type', 'max', 'Average or max pooling.')
tf.app.flags.DEFINE_integer('string_len', 1500,
                            'The length to truncate or pad to.')


class TFCharCNNModel(base_model.BaseModel):
  """TF Character CNN Model

  TF implementation of a Character CNN. Inputs should be strings.
  """

  def __init__(self, target_labels: Set[str]) -> None:
    self._target_labels = target_labels

  @staticmethod
  def hparams():
    filter_sizes = [int(units) for units in FLAGS.filter_sizes.split(',')]
    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]
    hparams = tf.contrib.training.HParams(
        learning_rate=FLAGS.learning_rate,
        dropout_rate=FLAGS.dropout_rate,
        filter_sizes=filter_sizes,
        num_filters=FLAGS.num_filters,
        dense_units=dense_units,
        embedding_size=FLAGS.embedding_size,
        pooling_type=FLAGS.pooling_type,
        string_len=FLAGS.string_len)
    return hparams

  def estimator(self, model_dir):
    estimator = tf.estimator.Estimator(
        model_fn=self._model_fn,
        params=self.hparams(),
        config=tf.estimator.RunConfig(model_dir=model_dir))
    return estimator

  def _model_fn(self, features, labels, mode, params, config):
    embedding = tf.Variable(
        tf.truncated_normal([256, params.embedding_size]),
        name='char_embedding')
    texts = features[base_model.TEXT_FEATURE_KEY]
    batch_size = tf.shape(texts)[0]
    byte_ids = tf.reshape(
        tf.cast(
            tf.decode_raw(
                tf.sparse_tensor_to_dense(
                    tf.string_split(texts, ''), default_value='\0'), tf.uint8),
            tf.int32), [batch_size, -1])
    padded_ids = tf.slice(
        tf.concat(
            [byte_ids,
             tf.zeros([batch_size, params.string_len], tf.int32)],
            axis=1), [0, 0], [batch_size, params.string_len])

    inputs = tf.nn.embedding_lookup(params=embedding, ids=padded_ids)

    # Conv
    X = inputs
    for filter_size in params.filter_sizes:
      X = layers.Conv1D(
          params.num_filters, filter_size, activation='relu', padding='same')(
              X)
    if params.pooling_type == 'average':
      X = layers.GlobalAveragePooling1D()(X)
    elif params.pooling_type == 'max':
      X = layers.GlobalMaxPooling1D()(X)
    else:
      raise ValueError('Unrecognized pooling type parameter')

    # FC
    logits = X
    for num_units in params.dense_units:
      logits = tf.layers.dense(
          inputs=logits, units=num_units, activation=tf.nn.relu)
      logits = tf.layers.dropout(logits, rate=params.dropout_rate)

    logits = tf.layers.dense(
        inputs=logits, units=len(self._target_labels), activation=None)

    output_heads = [
        tf.contrib.estimator.binary_classification_head(name=name)
        for name in self._target_labels
    ]
    multihead = tf.contrib.estimator.multi_head(output_heads)

    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
    return multihead.create_estimator_spec(
        features=features,
        labels=labels,
        mode=mode,
        logits=logits,
        optimizer=optimizer)


================================================
FILE: experiments/tf_trainer/tf_char_cnn/run.deploy.sh
================================================
#!/bin/bash
# Deploys a saved model on Cloud MLE.

if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then
    
    MODEL_NAME=tf_char_cnn_$1

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    exit 1
fi


# By default, the model is the last one from the user.
MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)

# Create a new model.
# Will raise an error if the model already exists.
gcloud ml-engine models create $MODEL_NAME \
  --regions us-central1

# Deploy a model version.
MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S")
gcloud ml-engine versions create $MODEL_VERSION \
  --model $MODEL_NAME \
  --origin $MODEL_SAVED_PATH \
  --runtime-version 1.10


================================================
FILE: experiments/tf_trainer/tf_char_cnn/run.hyperparameter.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_char_cnn"
MODEL_NAME_DATA=${MODEL_NAME}_$1
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.10 \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --region=us-east1 \
    --verbosity=debug \
    --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --embedding_size=300 \
    --model_dir="${JOB_DIR}/model_dir" \
    --is_embedding_trainable=False \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False

echo "Model dir:"
echo ${JOB_DIR}/model_dir


================================================
FILE: experiments/tf_trainer/tf_char_cnn/run.local.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"

python -m tf_trainer.tf_char_cnn.run \
  --train_path=$train_path \
  --validate_path=$valid_path \
  --model_dir="tf_char_cnn_local_model_dir" \
  --labels=$labels \
  --label_dtypes=$label_dtypes


================================================
FILE: experiments/tf_trainer/tf_char_cnn/run.ml_engine.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_char_cnn"
MODEL_NAME_DATA=${MODEL_NAME}_$1_glove
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"


gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.10 \
    --scale-tier 'BASIC_GPU' \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --python-version "3.5" \
    --region=us-east1 \
    --verbosity=debug \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --model_dir="${JOB_DIR}/model_dir" \
    --is_embedding_trainable=False \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False \
    --batch_size=32

echo "Model dir:"
echo ${JOB_DIR}/model_dir


================================================
FILE: experiments/tf_trainer/tf_char_cnn/run.py
================================================
"""Experiments with toxicity, civil_comments, many_communities datasets."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import tfrecord_input
from tf_trainer.tf_char_cnn import model as tf_char_cnn

FLAGS = tf.app.flags.FLAGS


def main(argv):
  del argv  # unused

  dataset = tfrecord_input.TFRecordInput()

  model = tf_char_cnn.TFCharCNNModel(dataset.labels())

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval()

  serving_input_fn = serving_input.create_text_serving_input_fn(
      text_feature_name=base_model.TEXT_FEATURE_KEY,
      example_key_name=base_model.EXAMPLE_KEY)
  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,
    metrics_key="auc/%s" % FLAGS.labels.split(',')[0])


if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_cnn/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/tf_cnn/finetune.py
================================================
"""Experiments with many_communities dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import nltk
import os
import pandas as pd
import tensorflow as tf

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import text_preprocessor
from tf_trainer.common import tfrecord_input
from tf_trainer.common import types
from tf_trainer.tf_cnn import model as tf_cnn

from tensorflow.python.lib.io import file_io

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("embeddings_path",
                           "local_data/glove.6B/glove.6B.100d.txt",
                           "Path to the embeddings file.")

tf.app.flags.DEFINE_string("tmp_results_path", None,
                           "Path to the local combined (across communities) results file.")

tf.app.flags.mark_flag_as_required("warm_start_from")
tf.app.flags.mark_flag_as_required("tmp_results_path")

def main(argv):
  del argv  # unused

  embeddings_path = FLAGS.embeddings_path

  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)

  nltk.download("punkt")
  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
  dataset = tfrecord_input.TFRecordInputWithTokenizer(
      train_preprocess_fn=train_preprocess_fn)

  # TODO: Move embedding *into* Keras model.
  model_tf = tf_cnn.TFCNNModel(dataset.labels())
  model = preprocessor.add_embedding_to_model(model_tf,
                                              base_model.TOKENS_FEATURE_KEY)

  trainer = model_trainer.ModelTrainer(dataset, model,
    warm_start_from=FLAGS.warm_start_from)
  trainer.train_with_eval()

  keys = [("label", "probabilities")]
  predictions = list(trainer.predict_on_dev(predict_keys=keys))

  valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv")
  df = pd.read_csv(valid_path_csv)
  labels = df["label"].values
  community = os.path.basename(FLAGS.validate_path).split("..")[0]

  assert len(labels) == len(predictions), \
    "Labels and predictions must have the same length."

  d = {
    "label" : labels,
    "prediction": [p[keys[0]][1] for p in predictions],
    "community": [community for p in predictions],
  }

  df = pd.DataFrame(data=d)
  df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+',
    index=False, header=False)

if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_cnn/finetune.sh
================================================
#!/bin/bash

BASE_PATH="gs://conversationai-models"
GCS_RESOURCES="${BASE_PATH}/resources"

warm_start_from="gs://conversationai-models/tf_trainer_runs/msushkov/tf_cnn_many_communities_40_per_8_shot_glove/20190723_110543/model_dir"
eval_steps=1
eval_period=5

labels="label"
label_dtypes="int"
text_feature="text"

batch_size=24
dense_units="64,64"
filter_sizes="3,4,5"
num_filters=128
dropout_rate=0.33976339995062715
pooling_type="max"

if [ "$1" == "test" ]; then
	VALIDATION_OR_TEST="test"

	# Best hparams found on the validation set
	learning_rate_lst=(0.00035725183171118115)
	train_steps_lst=(5)

else
	VALIDATION_OR_TEST="validation"

    # original, original/2, original/5, original/10, original*2
	learning_rate_lst=(0.00035725183171118115 0.00017862591 0.00007145036 0.000035725183171118115 0.00071450366)
	train_steps_lst=(5 10 50)
fi

combined_results_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/$VALIDATION_OR_TEST"
train_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord"

for learning_rate in "${learning_rate_lst[@]}"; do
	echo "Learning rate: $learning_rate"

	for train_steps in "${train_steps_lst[@]}"; do
		echo "Train steps: $train_steps"

		tmp_results_fname="tf_cnn_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv"
		tmp_results_path="/tmp/$tmp_results_fname"

		rm $tmp_results_path

		COUNTER=0
		for train_path in `gcloud storage ls $train_dir`; do
			echo "Community $COUNTER out of 170..."
			
			valid_path=${train_path/${VALIDATION_OR_TEST}_episodes\/support/${VALIDATION_OR_TEST}_episodes\/query}

			rm -rf "tf_cnn_local_model_dir"

			python -m tf_trainer.tf_cnn.finetune \
			    --model_dir="tf_cnn_local_model_dir" \
			    --train_path=$train_path \
			    --validate_path=$valid_path \
			    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \
			    --is_embedding_trainable=False \
			    --train_steps=$train_steps \
			    --eval_period=$eval_period \
			    --eval_steps=$eval_steps \
			    --labels=$labels \
			    --label_dtypes=$label_dtypes \
			    --preprocess_in_tf=False \
			    --batch_size=$batch_size \
			    --dense_units=$dense_units \
			    --filter_sizes=$filter_sizes \
			    --num_filters=$num_filters \
			    --dropout_rate=$dropout_rate \
			    --learning_rate=$learning_rate \
			    --pooling_type=$pooling_type \
			    --text_feature=$text_feature \
			    --warm_start_from=$warm_start_from \
			    --tmp_results_path=$tmp_results_path

			COUNTER=$[$COUNTER +1]
		done

		gcloud storage cp $tmp_results_path $combined_results_dir

	done
done

================================================
FILE: experiments/tf_trainer/tf_cnn/hparam_config.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  # scaleTier: CUSTOM
  # masterType: standard
  # workerType: standard_gpu
  # parameterServerType: large_model
  # workerCount: 1
  # parameterServerCount: 1
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/toxicity # TODO: change based on dataset
    maxTrials: 120
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 1
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: INTEGER
        minValue: 16
        maxValue: 256
        scaleType: UNIT_LOG_SCALE
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 32
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_cnn/hparam_config_civil_comments.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/toxicity
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 64
        - 128
        - 256
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_cnn/hparam_config_many_communities.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/removed
    maxTrials: 150
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 64
        - 128
        - 256
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_cnn/hparam_config_many_communities_40_per_8_shot.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/label
    maxTrials: 150
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 32
        - 64
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_cnn/hparam_config_toxicity.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/frac_neg
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: filter_sizes 
        type: CATEGORICAL 
        categoricalValues:
        - '5,5'
        - '3,4,5'
      - parameterName: num_filters
        type: DISCRETE
        discreteValues:
        - 64
        - 128
      - parameterName: dense_units 
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: pooling_type 
        type: CATEGORICAL
        categoricalValues:
        - 'average'
        - 'max'

================================================
FILE: experiments/tf_trainer/tf_cnn/model.py
================================================
"""Tensorflow Estimator CNN."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.keras import layers
from tf_trainer.common import base_model
from typing import Set

FLAGS = tf.app.flags.FLAGS

# Hyperparameters
# TODO: Add validation
tf.app.flags.DEFINE_float('learning_rate', 0.00003,
                          'The learning rate to use during training.')
tf.app.flags.DEFINE_float('dropout_rate', 0.3,
                          'The dropout rate to use during training.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning.
# TODO: add link to relevant public issue/bug/documentation?
tf.app.flags.DEFINE_string(
    'filter_sizes', '5',
    'Comma delimited string for the sizes of convolution filters.')
tf.app.flags.DEFINE_integer(
    'num_filters', 128,
    'Number of convolutional filters for every convolutional layer.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning.
# TODO: add link to relevant public issue/bug/documentation?
tf.app.flags.DEFINE_string(
    'dense_units', '128',
    'Comma delimited string for the number of hidden units in the dense layer.')
tf.app.flags.DEFINE_integer('embedding_size', 300,
                            'The number of dimensions in the word embedding.')
tf.app.flags.DEFINE_string('pooling_type', 'average', 'Average or max pooling.')


class TFCNNModel(base_model.BaseModel):
  """TF CNN Model

  TF implementation of a CNN. Inputs should be
  sequences of word embeddings.
  """

  def __init__(self, target_labels: Set[str]) -> None:
    self._target_labels = target_labels

  @staticmethod
  def hparams():
    filter_sizes = [int(units) for units in FLAGS.filter_sizes.split(',')]
    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]
    hparams = tf.contrib.training.HParams(
        learning_rate=FLAGS.learning_rate,
        dropout_rate=FLAGS.dropout_rate,
        filter_sizes=filter_sizes,
        num_filters=FLAGS.num_filters,
        dense_units=dense_units,
        embedding_size=FLAGS.embedding_size,
        pooling_type=FLAGS.pooling_type)
    return hparams

  def estimator(self, model_dir):
    estimator = tf.estimator.Estimator(
        model_fn=self._model_fn,
        params=self.hparams(),
        config=tf.estimator.RunConfig(model_dir=model_dir))
    return estimator

  def _model_fn(self, features, labels, mode, params, config):
    inputs = features[base_model.TOKENS_FEATURE_KEY]
    batch_size = tf.shape(inputs)[0]

    # Conv
    X = inputs
    for filter_size in params.filter_sizes:
      X = layers.Conv1D(
          params.num_filters, filter_size, activation='relu', padding='same')(
              X)
    if params.pooling_type == 'average':
      X = layers.GlobalAveragePooling1D()(X)
    elif params.pooling_type == 'max':
      X = layers.GlobalMaxPooling1D()(X)
    else:
      raise ValueError('Unrecognized pooling type parameter')

    # FC
    logits = X
    for num_units in params.dense_units:
      logits = tf.layers.dense(
          inputs=logits, units=num_units, activation=tf.nn.relu)
      logits = tf.layers.dropout(logits, rate=params.dropout_rate)

    logits = tf.layers.dense(
        inputs=logits, units=len(self._target_labels), activation=None)

    output_heads = [
        tf.contrib.estimator.binary_classification_head(name=name)
        for name in self._target_labels
    ]
    multihead = tf.contrib.estimator.multi_head(output_heads)

    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
    return multihead.create_estimator_spec(
        features=features,
        labels=labels,
        mode=mode,
        logits=logits,
        optimizer=optimizer)


================================================
FILE: experiments/tf_trainer/tf_cnn/run.deploy.sh
================================================
#!/bin/bash
# Deploys a saved model on Cloud MLE.

if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then
    
    MODEL_NAME=tf_cnn_$1_glove

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    exit 1
fi


# By default, the model is the last one from the user.
MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)

# Create a new model.
# Will raise an error if the model already exists.
gcloud ml-engine models create $MODEL_NAME \
  --regions us-central1

# Deploy a model version.
MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S")
gcloud ml-engine versions create $MODEL_VERSION \
  --model $MODEL_NAME \
  --origin $MODEL_SAVED_PATH \
  --runtime-version 1.10


================================================
FILE: experiments/tf_trainer/tf_cnn/run.hyperparameter.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_cnn"
MODEL_NAME_DATA=${MODEL_NAME}_$1_glove
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.12 \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --region=us-east1 \
    --verbosity=debug \
    --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \
    --embedding_size=300 \
    --model_dir="${JOB_DIR}/model_dir" \
    --is_embedding_trainable=False \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False \
    --text_feature=$text_feature

echo "Model dir:"
echo ${JOB_DIR}/model_dir


================================================
FILE: experiments/tf_trainer/tf_cnn/run.local.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"

python -m tf_trainer.tf_cnn.run \
  --train_path=$train_path \
  --validate_path=$valid_path \
  --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \
  --model_dir="tf_cnn_local_model_dir" \
  --labels=$labels \
  --label_dtypes=$label_dtypes


================================================
FILE: experiments/tf_trainer/tf_cnn/run.ml_engine.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_cnn"
MODEL_NAME_DATA=${MODEL_NAME}_$1_glove
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

if [ "$1" == "civil_comments" ]; then
    batch_size=128
    dense_units='128,128'
    filter_sizes='3,4,5'
    num_filters=128
    dropout_rate=0.01527361736403272
    learning_rate=0.0001932910006772403
    pooling_type='average'
    train_steps=50000
    eval_period=1000
    eval_steps=2000

elif [ "$1" == "toxicity" ]; then
    batch_size=128
    dense_units='64'
    filter_sizes='3,4,5'
    num_filters=128
    dropout_rate=0.59761635967002524
    learning_rate=0.00028233147441192243
    pooling_type='max'
    train_steps=55000
    eval_period=1000
    eval_steps=1500

elif [ "$1" == "many_communities" ]; then
    batch_size=128
    dense_units='128,128'
    filter_sizes='3,4,5'
    num_filters=128
    dropout_rate=0.42090135248508892
    learning_rate=8.8262915612024245e-05
    pooling_type='average'
    train_steps=700000
    eval_period=4000
    eval_steps=45000

elif [ "$1" == "many_communities_40_per_8_shot" ]; then

    train_steps=8000
    eval_steps=250
    eval_period=200

    if [ "$2" == "optimistic" ]; then

        batch_size=64
        dense_units='64'
        filter_sizes='3,4,5'
        num_filters=128
        dropout_rate=0.50444323963758519
        learning_rate=0.00016448334200861331
        pooling_type='max'

    elif [ "$2" == "pessimistic" ]; then
        
        batch_size=32
        dense_units='64,64'
        filter_sizes='3,4,5'
        num_filters=128
        dropout_rate=0.33976339995062715
        learning_rate=0.00035725183171118115
        pooling_type='max'

    else
        echo "Must provide second positional argument."
        exit 1
    fi

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    return;
fi

gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.10 \
    --scale-tier 'BASIC_GPU' \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --python-version "3.5" \
    --region=us-east1 \
    --verbosity=debug \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \
    --model_dir="${JOB_DIR}/model_dir" \
    --is_embedding_trainable=False \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False \
    --batch_size=$batch_size \
    --dense_units=$dense_units \
    --filter_sizes=$filter_sizes \
    --num_filters=$num_filters \
    --dropout_rate=$dropout_rate \
    --learning_rate=$learning_rate \
    --pooling_type=$pooling_type \
    --text_feature=$text_feature

echo "Model dir:"
echo ${JOB_DIR}/model_dir


================================================
FILE: experiments/tf_trainer/tf_cnn/run.py
================================================
"""Experiments with toxicity, civil_comments, many_communities datasets."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import nltk
import tensorflow as tf

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import text_preprocessor
from tf_trainer.common import tfrecord_input
from tf_trainer.common import types
from tf_trainer.tf_cnn import model as tf_cnn

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("embeddings_path",
                           "local_data/glove.6B/glove.6B.100d.txt",
                           "Path to the embeddings file.")

def main(argv):
  del argv  # unused

  embeddings_path = FLAGS.embeddings_path

  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)

  nltk.download("punkt")
  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
  dataset = tfrecord_input.TFRecordInputWithTokenizer(
      train_preprocess_fn=train_preprocess_fn)

  # TODO: Move embedding *into* Keras model.
  model_tf = tf_cnn.TFCNNModel(dataset.labels())
  model = preprocessor.add_embedding_to_model(model_tf,
                                              base_model.TOKENS_FEATURE_KEY)

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval()

  serving_input_fn = serving_input.create_serving_input_fn(
      word_to_idx=preprocessor._word_to_idx,
      unknown_token=preprocessor._unknown_token,
      text_feature_name=base_model.TOKENS_FEATURE_KEY,
      example_key_name=base_model.EXAMPLE_KEY)
  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,
    metrics_key="auc/%s" % FLAGS.labels.split(',')[0])


if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_gru_attention/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/tf_gru_attention/finetune.py
================================================
"""Experiments with many_communities dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import nltk
import os
import pandas as pd
import tensorflow as tf

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import text_preprocessor
from tf_trainer.common import tfrecord_input
from tf_trainer.common import types
from tf_trainer.tf_gru_attention import model as tf_gru_attention

from tensorflow.python.lib.io import file_io

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("embeddings_path",
                           "local_data/glove.6B/glove.6B.100d.txt",
                           "Path to the embeddings file.")

tf.app.flags.DEFINE_string("tmp_results_path", None,
                           "Path to the local combined (across communities) results file.")

tf.app.flags.mark_flag_as_required("warm_start_from")
tf.app.flags.mark_flag_as_required("tmp_results_path")

def main(argv):
  del argv  # unused

  embeddings_path = FLAGS.embeddings_path

  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)

  nltk.download("punkt")
  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
  dataset = tfrecord_input.TFRecordInputWithTokenizer(
      train_preprocess_fn=train_preprocess_fn)

  # TODO: Move embedding *into* Keras model.
  model_tf = tf_gru_attention.TFRNNModel(dataset.labels())
  model = preprocessor.add_embedding_to_model(model_tf,
                                              base_model.TOKENS_FEATURE_KEY)

  trainer = model_trainer.ModelTrainer(dataset, model,
    warm_start_from=FLAGS.warm_start_from)
  trainer.train_with_eval()

  keys = [("label", "probabilities")]
  predictions = list(trainer.predict_on_dev(predict_keys=keys))

  valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv")
  df = pd.read_csv(valid_path_csv)
  labels = df["label"].values
  community = os.path.basename(FLAGS.validate_path).split("..")[0]

  assert len(labels) == len(predictions), \
    "Labels and predictions must have the same length."

  d = {
    "label" : labels,
    "prediction": [p[keys[0]][1] for p in predictions],
    "community": [community for p in predictions],
  }

  df = pd.DataFrame(data=d)
  df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+',
    index=False, header=False)

if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_gru_attention/finetune.sh
================================================
#!/bin/bash

BASE_PATH="gs://conversationai-models"
GCS_RESOURCES="${BASE_PATH}/resources"

warm_start_from="gs://conversationai-models/tf_trainer_runs/msushkov/tf_gru_attention_many_communities_40_per_8_shot_glove/20190723_110533/model_dir"
eval_steps=1
eval_period=5

labels="label"
label_dtypes="int"
text_feature="text"
    
batch_size=24
attention_units=64
dropout_rate=0.052541994248873507
dense_units='128,128'
gru_units='128'

if [ "$1" == "test" ]; then
	VALIDATION_OR_TEST="test"

	# Best hparams found on the validation set
	learning_rate_lst=(0.000049418814574477758)
	train_steps_lst=(50)

else
	VALIDATION_OR_TEST="validation"

	# original, original/2, original/5, original/10, original*2, original/20, original/50
	#learning_rate_lst=(0.00049418814574477758 0.00024709407 0.00009883762 0.000049418814574477758 0.00098837629 0.0000247094 0.00000988376)

	# original*4, original*10, original*20
	learning_rate_lst=(0.00197675258 0.0049418814574477758 0.00988376291)
	train_steps_lst=(5 10 50 100)
fi

combined_results_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/$VALIDATION_OR_TEST"
train_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord"

for learning_rate in "${learning_rate_lst[@]}"; do
	echo "Learning rate:"
	echo $learning_rate

	for train_steps in "${train_steps_lst[@]}"; do
		echo "Train steps:"
		echo $train_steps

		tmp_results_fname="tf_gru_attention_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv"
		tmp_results_path="/tmp/$tmp_results_fname"

		rm $tmp_results_path

		COUNTER=0
		for train_path in `gcloud storage ls $train_dir`; do
			
			valid_path=${train_path/${VALIDATION_OR_TEST}_episodes\/support/${VALIDATION_OR_TEST}_episodes\/query}

			rm -rf "tf_gru_attention_local_model_dir"

			python -m tf_trainer.tf_gru_attention.finetune \
			    --model_dir="tf_gru_attention_local_model_dir" \
			    --train_path=$train_path \
			    --validate_path=$valid_path \
			    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \
			    --is_embedding_trainable=False \
			    --train_steps=$train_steps \
			    --eval_period=$eval_period \
			    --eval_steps=$eval_steps \
			    --labels=$labels \
			    --label_dtypes=$label_dtypes \
			    --preprocess_in_tf=False \
			    --batch_size=$batch_size \
			    --attention_units=$attention_units \
			    --dropout_rate=$dropout_rate \
			    --learning_rate=$learning_rate \
			    --dense_units=$dense_units \
			    --gru_units=$gru_units \
			    --text_feature=$text_feature \
			    --warm_start_from=$warm_start_from \
			    --tmp_results_path=$tmp_results_path

			COUNTER=$[$COUNTER +1]
		done

		gcloud storage cp $tmp_results_path $combined_results_dir

	done
done

================================================
FILE: experiments/tf_trainer/tf_gru_attention/hparam_config.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: CUSTOM
  masterType: standard
  workerType: standard_gpu
  parameterServerType: large_model
  workerCount: 1
  parameterServerCount: 1
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/frac_neg
    maxTrials: 40
    maxParallelTrials: 4
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 1
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: INTEGER
        minValue: 16
        maxValue: 128
        scaleType: UNIT_LOG_SCALE
      - parameterName: gru_units
        type: CATEGORICAL
        categoricalValues:
        - '256'            
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: attention_units
        type: DISCRETE
        discreteValues:
        - 32
        - 64
        - 124
        - 256
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'


================================================
FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_civil_comments.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/toxicity
    maxTrials: 200
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
      - parameterName: gru_units
        type: CATEGORICAL
        categoricalValues:          
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: attention_units
        type: DISCRETE
        discreteValues:
        - 32
        - 64
        - 128
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'

================================================
FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_many_communities.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/removed
    maxTrials: 200
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
      - parameterName: gru_units
        type: CATEGORICAL
        categoricalValues:         
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: attention_units
        type: DISCRETE
        discreteValues:
        - 32
        - 64
        - 128
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'

================================================
FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_many_communities_40_per_8_shot.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/label
    maxTrials: 200
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 32
        - 64
      - parameterName: gru_units
        type: CATEGORICAL
        categoricalValues:         
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: attention_units
        type: DISCRETE
        discreteValues:
        - 32
        - 64
        - 128
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'

================================================
FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_toxicity.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/frac_neg
    maxTrials: 200
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
      - parameterName: gru_units
        type: CATEGORICAL
        categoricalValues:           
        - '128'
        - '128,128'
        - '64'
        - '64,64'
      - parameterName: attention_units
        type: DISCRETE
        discreteValues:
        - 32
        - 64
        - 128
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '64'
        - '64,64'

================================================
FILE: experiments/tf_trainer/tf_gru_attention/model.py
================================================
"""Tensorflow Estimator implementation of RNN Model with Attention"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tf_trainer.common import base_model
from typing import Set

FLAGS = tf.app.flags.FLAGS

# Hyperparameters
# TODO: Add validation
tf.app.flags.DEFINE_float('learning_rate', 0.00003,
                          'The learning rate to use during training.')
tf.app.flags.DEFINE_float('dropout_rate', 0.3,
                          'The dropout rate to use during training.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning.
tf.app.flags.DEFINE_string(
    'gru_units', '128',
    'Comma delimited string for the number of hidden units in the gru layer.')
tf.app.flags.DEFINE_integer('attention_units', 64,
                            'The number of hidden units in the gru layer.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning.
tf.app.flags.DEFINE_string(
    'dense_units', '128',
    'Comma delimited string for the number of hidden units in the dense layer.')


def attend(inputs, attention_size, attention_depth=1):
  """Attention layer."""

  sequence_length = tf.shape(inputs)[1]  # dynamic
  final_layer_size = inputs.shape[2]  # static

  x = tf.reshape(inputs, [-1, final_layer_size])
  for _ in range(attention_depth - 1):
    x = tf.layers.dense(x, attention_size, activation=tf.nn.relu)
  x = tf.layers.dense(x, 1, activation=None)
  logits = tf.reshape(x, [-1, sequence_length, 1])
  alphas = tf.nn.softmax(logits, dim=1)

  output = tf.reduce_sum(inputs * alphas, 1)

  return output, alphas


class TFRNNModel(base_model.BaseModel):

  def __init__(self, target_labels: Set[str]) -> None:
    self._target_labels = target_labels

  @staticmethod
  def hparams():
    gru_units = [int(units) for units in FLAGS.gru_units.split(',')]
    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]
    hparams = tf.contrib.training.HParams(
        learning_rate=FLAGS.learning_rate,
        dropout_rate=FLAGS.dropout_rate,
        gru_units=gru_units,
        attention_units=FLAGS.attention_units,
        dense_units=dense_units)
    return hparams

  def estimator(self, model_dir):
    estimator = tf.estimator.Estimator(
        model_fn=self._model_fn,
        params=self.hparams(),
        config=tf.estimator.RunConfig(model_dir=model_dir))
    return estimator

  def _model_fn(self, features, labels, mode, params, config):
    inputs = features[base_model.TOKENS_FEATURE_KEY]
    batch_size = tf.shape(inputs)[0]

    rnn_layers = [
        tf.nn.rnn_cell.GRUCell(num_units=size, activation=tf.nn.tanh)
        for size in params.gru_units
    ]

    # create a RNN cell composed sequentially of a number of RNNCells
    multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)

    # TODO: make bidirectional
    outputs, states = tf.nn.dynamic_rnn(
        multi_rnn_cell, inputs, dtype=tf.float32)

    # TODO: Handle sequence length in the attention layer (via a mask).
    #       Padded elements should not be part of the average.
    logits, _ = attend(inputs=outputs, attention_size=params.attention_units)

    for num_units in params.dense_units:
      logits = tf.layers.dense(
          inputs=logits, units=num_units, activation=tf.nn.relu)
      logits = tf.layers.dropout(logits, rate=params.dropout_rate)
    logits = tf.layers.dense(
        inputs=logits, units=len(self._target_labels), activation=None)

    output_heads = [
        tf.contrib.estimator.binary_classification_head(name=name)
        for name in self._target_labels
    ]
    multihead = tf.contrib.estimator.multi_head(output_heads)

    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
    return multihead.create_estimator_spec(
        features=features,
        labels=labels,
        mode=mode,
        logits=logits,
        optimizer=optimizer)


================================================
FILE: experiments/tf_trainer/tf_gru_attention/run.deploy.sh
================================================
#!/bin/bash
# Deploys a saved model on Cloud MLE.

if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then
    
    MODEL_NAME=tf_gru_attention_$1_glove

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    exit 1
fi


# By default, the model is the last one from the user.
MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)

# Create a new model.
# Will raise an error if the model already exists.
gcloud ml-engine models create $MODEL_NAME \
  --regions us-central1

# Deploy a model version.
MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S")
gcloud ml-engine versions create $MODEL_VERSION \
  --model $MODEL_NAME \
  --origin $MODEL_SAVED_PATH \
  --runtime-version 1.10


================================================
FILE: experiments/tf_trainer/tf_gru_attention/run.hyperparameter.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_gru_attention"
MODEL_NAME_DATA="${MODEL_NAME}_$1_glove"
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.12 \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --region=us-east1 \
    --verbosity=debug \
    --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \
    --embedding_size=300 \
    --model_dir="${JOB_DIR}/model_dir" \
    --is_embedding_trainable=False \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False \
    --text_feature=$text_feature

echo "Model dir:"
echo ${JOB_DIR}/model_dir


================================================
FILE: experiments/tf_trainer/tf_gru_attention/run.local.sh
================================================
#!/bin/bash

# Note:
# We currently use 2 different embeddings:
# - glove.6B/glove.6B.300d.txt
# - google-news/GoogleNews-vectors-negative300.txt
# Glove assumes all words are lowercased, while Google-news handles different casing.
# As there is currently no tf operation that perform lowercasing, we have the following 
# requirements:
# - For google news: Run preprocess_in_tf=True (no lowercasing).
# - For glove.6B, Run preprocess_in_tf=False (will force lowercasing).

source "tf_trainer/common/dataset_config.sh"

python -m tf_trainer.tf_gru_attention.run \
  --train_path=$train_path \
  --validate_path=$valid_path \
  --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \
  --model_dir="tf_gru_attention_local_model_dir" \
  --labels=$labels \
  --label_dtypes=$label_dtypes \
  --preprocess_in_tf=False \
  --text_feature=$text_feature

================================================
FILE: experiments/tf_trainer/tf_gru_attention/run.ml_engine.sh
================================================
#!/bin/bash
# This script runs one training job on Cloud MLE.

# Note:
# We currently use 2 different embeddings:
# - glove.6B/glove.6B.300d.txt
# - google-news/GoogleNews-vectors-negative300.txt
# Glove assumes all words are lowercased, while Google-news handles different casing.
# As there is currently no tf operation that perform lowercasing, we have the following 
# requirements:
# - For google news: Run preprocess_in_tf=True (no lowercasing).
# - For glove.6B, Run preprocess_in_tf=False (will force lowercasing).

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_gru_attention"
MODEL_NAME_DATA=${MODEL_NAME}_$1_glove
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

if [ "$1" == "civil_comments" ]; then
    batch_size=128
    attention_units=32
    dropout_rate=0.60960359286224075
    learning_rate=0.0010256671195808884
    dense_units='128'
    gru_units='128,128'
    train_steps=50000
    eval_period=1000
    eval_steps=2000
    config="tf_trainer/common/basic_gpu_config.yaml"
    text_feature="comment_text"

elif [ "$1" == "toxicity" ]; then
    batch_size=32
    attention_units=32
    dropout_rate=0.69999994803861521
    learning_rate=0.00030340058446715442
    dense_units='128'
    gru_units='128,128'
    train_steps=250000
    eval_period=1000
    eval_steps=6000
    config="tf_trainer/common/basic_gpu_config.yaml"
    text_feature="comment_text"

elif [ "$1" == "many_communities" ]; then
    batch_size=128
    attention_units=32
    dropout_rate=0.38471142580880757
    learning_rate=0.000755324856537066
    dense_units='128'
    gru_units='128'
    train_steps=700000
    eval_period=4000
    eval_steps=45000
    config="tf_trainer/common/p100_config.yaml"
    text_feature="comment_text"

elif [ "$1" == "many_communities_40_per_8_shot" ]; then

    train_steps=8000
    eval_steps=250
    eval_period=200
    config="tf_trainer/common/basic_gpu_config.yaml"

    if [ "$2" == "optimistic" ]; then

        batch_size=64
        attention_units=32
        dropout_rate=0.69778643162683085
        learning_rate=0.00080291321858594659
        dense_units='128,128'
        gru_units='128'

    elif [ "$2" == "pessimistic" ]; then
        
        batch_size=32
        attention_units=64
        dropout_rate=0.052541994248873507
        learning_rate=0.00049418814574477758
        dense_units='128,128'
        gru_units='128'

    else
        echo "Must provide second positional argument."
        exit 1
    fi

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    return;
fi

gcloud ai-platform jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.10 \
    --config $config \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --region=us-east1 \
    --verbosity=debug \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \
    --model_dir="${JOB_DIR}/model_dir" \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False \
    --batch_size=$batch_size \
    --attention_units=$attention_units \
    --dropout_rate=$dropout_rate \
    --learning_rate=$learning_rate \
    --dense_units=$dense_units \
    --gru_units=$gru_units \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --text_feature=$text_feature


================================================
FILE: experiments/tf_trainer/tf_gru_attention/run.py
================================================
"""Experiments with Toxicity Dataset"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import nltk
import tensorflow as tf

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import text_preprocessor
from tf_trainer.common import tfrecord_input
from tf_trainer.common import types
from tf_trainer.tf_gru_attention import model as tf_gru_attention


FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("embeddings_path",
                           "local_data/glove.6B/glove.6B.100d.txt",
                           "Path to the embeddings file.")


def main(argv):
  del argv  # unused

  embeddings_path = FLAGS.embeddings_path

  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)

  nltk.download("punkt")
  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
  dataset = tfrecord_input.TFRecordInputWithTokenizer(
      train_preprocess_fn=train_preprocess_fn)

  # TODO: Move embedding *into* Keras model.
  model_tf = tf_gru_attention.TFRNNModel(dataset.labels())
  model = preprocessor.add_embedding_to_model(model_tf,
                                              base_model.TOKENS_FEATURE_KEY)

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval()

  serving_input_fn = serving_input.create_serving_input_fn(
      word_to_idx=preprocessor._word_to_idx,
      unknown_token=preprocessor._unknown_token,
      text_feature_name=base_model.TOKENS_FEATURE_KEY,
      example_key_name=base_model.EXAMPLE_KEY)
  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,
    metrics_key="auc/%s" % FLAGS.labels.split(',')[0])


if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/finetune.py
================================================
"""Experiments with many_communities dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import tfrecord_input
from tf_trainer.tf_hub_classifier import model as tf_hub_classifier

import os
import pandas as pd
import tensorflow as tf

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("embeddings_path",
                           "local_data/glove.6B/glove.6B.100d.txt",
                           "Path to the embeddings file.")

tf.app.flags.DEFINE_string("tmp_results_path", None,
                           "Path to the local combined (across communities) results file.")

tf.app.flags.mark_flag_as_required("warm_start_from")
tf.app.flags.mark_flag_as_required("tmp_results_path")

def main(argv):
  del argv  # unused

  dataset = tfrecord_input.TFRecordInput()
  model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())

  trainer = model_trainer.ModelTrainer(dataset, model,
    warm_start_from=FLAGS.warm_start_from)
  trainer.train_with_eval()

  keys = [("label", "probabilities")]
  predictions = list(trainer.predict_on_dev(predict_keys=keys))

  valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv")
  df = pd.read_csv(valid_path_csv)
  labels = df["label"].values
  community = os.path.basename(FLAGS.validate_path).split("..")[0]

  assert len(labels) == len(predictions), \
    "Labels and predictions must have the same length."

  d = {
    "label" : labels,
    "prediction": [p[keys[0]][1] for p in predictions],
    "community": [community for p in predictions],
  }

  df = pd.DataFrame(data=d)
  df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+',
    index=False, header=False)

if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/finetune.sh
================================================
#!/bin/bash

BASE_PATH="gs://conversationai-models"
GCS_RESOURCES="${BASE_PATH}/resources"

warm_start_from="gs://conversationai-models/tf_trainer_runs/msushkov/tf_hub_classifier_many_communities_40_per_8_shot/20190723_110557/model_dir"

eval_steps=1
eval_period=5

labels="label"
label_dtypes="int"
text_feature="text"
    
batch_size=24
dropout_rate=0.53291173797826941
dense_units='256,128,64'

if [ "$1" == "test" ]; then
	VALIDATION_OR_TEST="test"

	# Best hparams found on the validation set
	learning_rate_lst=(0.00001238498)
	train_steps_lst=(50)

else
	VALIDATION_OR_TEST="validation"

	# original, original/2, original/5, original/10, original*2
	learning_rate_lst=(6.1924912697697353e-06 0.00000309624 0.00000123849 6.1924912697697353e-07 0.00001238498)
	train_steps_lst=(5 10 50)
fi

train_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord"
combined_results_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/$VALIDATION_OR_TEST"

for learning_rate in "${learning_rate_lst[@]}"; do
	echo "Learning rate:"
	echo $learning_rate

	for train_steps in "${train_steps_lst[@]}"; do
		echo "Train steps:"
		echo $train_steps

		tmp_results_fname="tf_hub_classifier_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv"
		tmp_results_path="/tmp/$tmp_results_fname"

		rm $tmp_results_path

		COUNTER=0
		for train_path in `gcloud storage ls $train_dir`; do
			
			valid_path=${train_path/${VALIDATION_OR_TEST}_episodes\/support/${VALIDATION_OR_TEST}_episodes\/query}

			rm -rf "tf_hub_classifier_local_model_dir"

			python -m tf_trainer.tf_hub_classifier.finetune \
			    --model_dir="tf_hub_classifier_local_model_dir" \
			    --train_path=$train_path \
			    --validate_path=$valid_path \
			    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \
			    --is_embedding_trainable=False \
			    --train_steps=$train_steps \
			    --eval_period=$eval_period \
			    --eval_steps=$eval_steps \
			    --labels=$labels \
			    --label_dtypes=$label_dtypes \
			    --preprocess_in_tf=False \
			    --batch_size=$batch_size \
			    --dense_units=$dense_units \
			    --dropout_rate=$dropout_rate \
			    --learning_rate=$learning_rate \
			    --text_feature=$text_feature \
			    --warm_start_from=$warm_start_from \
			    --tmp_results_path=$tmp_results_path

			COUNTER=$[$COUNTER +1]
		done

		gcloud storage cp $tmp_results_path $combined_results_dir

	done
done

================================================
FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: CUSTOM
  masterType: standard
  workerType: standard_gpu
  parameterServerType: large_model
  workerCount: 1
  parameterServerCount: 1
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/frac_neg
    maxTrials: 40
    maxParallelTrials: 4
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 1
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: INTEGER
        minValue: 16
        maxValue: 256
        scaleType: UNIT_LOG_SCALE
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '512,128,64'
        - '128,64,64'
        - '128,64'
        - '512,64'
        - '128,128,128,64'


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_civil_comments.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/toxicity
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '512,128,64'
        - '128,64,64'
        - '128,64'
        - '512,64'
        - '128,128,128,64'


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_many_communities.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/removed
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '512,128,64'
        - '128,64,64'
        - '128,64'
        - '512,64'
        - '128,128,128,64'


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_many_communities_40_per_8_shot.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/label
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 32
        - 64
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '256,128,64'
        - '128,64,64'
        - '128,64'
        - '512,64'


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_toxicity.yaml
================================================
trainingInput:
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/frac_neg
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: FALSE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 0.7
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '512,128,64'
        - '128,64,64'
        - '128,64'
        - '512,64'
        - '128,128,128,64'


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/model.py
================================================
"""Tensorflow Estimator using TF Hub universal sentence encoder."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import tensorflow_hub as hub
from tf_trainer.common import base_model
from typing import List

FLAGS = tf.app.flags.FLAGS

# Hyperparameters
# TODO: Add validation
tf.app.flags.DEFINE_float('learning_rate', 0.00003,
                          'The learning rate to use during training.')
tf.app.flags.DEFINE_float('dropout_rate', 0.15,
                          'The dropout rate to use during training.')
tf.app.flags.DEFINE_string(
    'model_spec',
    'https://tfhub.dev/google/universal-sentence-encoder-large/3',
    'The url of the TF Hub sentence encoding module to use.')
tf.app.flags.DEFINE_bool('trainable', True,
                         'What to pass for the TF Hub trainable parameter.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning. The length of the list
# determines the number of layers, and the size of each layer.
tf.app.flags.DEFINE_string(
    'dense_units', '1024,1024,512',
    'Comma delimited string for the number of hidden units in the dense layers.'
)


class TFHubClassifierModel(base_model.BaseModel):

  def __init__(self, target_labels: List[str]) -> None:
    self._target_labels = target_labels

  @staticmethod
  def hparams():
    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]
    hparams = tf.contrib.training.HParams(
        learning_rate=FLAGS.learning_rate,
        dropout_rate=FLAGS.dropout_rate,
        dense_units=dense_units)
    return hparams

  def estimator(self, model_dir):
    estimator = tf.estimator.Estimator(
        model_fn=self._model_fn,
        params=self.hparams(),
        config=tf.estimator.RunConfig(model_dir=model_dir))
    return estimator

  def _model_fn(self, features, labels, mode, params, config):
    embedded_text_feature_column = hub.text_embedding_column(
        key=base_model.TEXT_FEATURE_KEY,
        module_spec=FLAGS.model_spec,
        trainable=FLAGS.trainable)
    inputs = tf.feature_column.input_layer(features,
                                           [embedded_text_feature_column])

    batch_size = tf.shape(inputs)[0]

    logits = inputs
    for num_units in params.dense_units:
      logits = tf.layers.dense(
          inputs=logits, units=num_units, activation=tf.nn.relu)
      logits = tf.layers.dropout(logits, rate=params.dropout_rate)
    logits = tf.layers.dense(
        inputs=logits, units=len(self._target_labels), activation=None)

    output_heads = [
        tf.contrib.estimator.binary_classification_head(
            name=name, weight_column=name + '_weight')
        for name in self._target_labels
    ]
    multihead = tf.contrib.estimator.multi_head(output_heads)

    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
    return multihead.create_estimator_spec(
        features=features,
        labels=labels,
        mode=mode,
        logits=logits,
        optimizer=optimizer)


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/run.deploy.sh
================================================
#!/bin/bash
# Deploys a saved model on Cloud MLE.

if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then
    
    MODEL_NAME=tf_hub_classifier_$1

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    exit 1
fi


# By default, the model is the last one from the user.
MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)

# Create a new model.
# Will raise an error if the model already exists.
gcloud ml-engine models create $MODEL_NAME \
  --regions us-central1

# Deploy a model version.
MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S")
gcloud ml-engine versions create $MODEL_VERSION \
  --model $MODEL_NAME \
  --origin $MODEL_SAVED_PATH \
  --runtime-version 1.10


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/run.hyperparameter.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_hub_classifier"
MODEL_NAME_DATA="${MODEL_NAME}_$1"
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.12 \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --region=us-east1 \
    --verbosity=debug \
    --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --model_dir="${JOB_DIR}/model_dir" \
    --is_embedding_trainable=False \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False \
    --model_spec="gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73" \
    --text_feature=$text_feature


echo "Model dir:"
echo ${JOB_DIR}/model_dir


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/run.local.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"

python -m tf_trainer.tf_hub_classifier.run \
  --train_path=$train_path \
  --validate_path=$valid_path \
  --model_dir="tf_hub_classifier_local_model_dir" \
  --model_spec="gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73" \
  --labels=$labels \
  --label_dtypes=$label_dtypes


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/run.ml_engine.sh
================================================
#!/bin/bash
# This script runs one training job on Cloud MLE.

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_hub_classifier"
MODEL_NAME_DATA="${MODEL_NAME}_$1"
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"


if [ "$1" == "civil_comments" ]; then
    batch_size=128
    dropout_rate=0.12298246947263007
    learning_rate=0.0001473127671008433
    dense_units='512,128,64'
    train_steps=50000
    eval_period=1000
    eval_steps=2000
    config="tf_trainer/common/p100_config.yaml"

elif [ "$1" == "toxicity" ]; then
    batch_size=32
    dropout_rate=0.38925458520872092
    learning_rate=0.00012916208894260696
    dense_units='512,128,64'    
    train_steps=250000
    eval_period=1000
    eval_steps=6000
    config="tf_trainer/common/p100_config.yaml"

elif [ "$1" == "many_communities" ]; then
    batch_size=128
    dropout_rate=0.6987085501984901
    learning_rate=0.00031738926545884962
    dense_units='512,128,64'    
    train_steps=700000
    eval_period=4000
    eval_steps=45000
    config="tf_trainer/common/basic_gpu_config.yaml"

elif [ "$1" == "many_communities_40_per_8_shot" ]; then

    train_steps=8000
    eval_steps=250
    eval_period=200
    config="tf_trainer/common/basic_gpu_config.yaml"

    if [ "$2" == "optimistic" ]; then

        batch_size=32
        dropout_rate=0.69999979814967772
        learning_rate=7.2549254796945835e-06
        dense_units='512,64'

    elif [ "$2" == "pessimistic" ]; then
        
        batch_size=32
        dropout_rate=0.53291173797826941
        learning_rate=6.1924912697697353e-06
        dense_units='256,128,64'

    else
        echo "Must provide second positional argument."
        exit 1
    fi

else
    echo "First positional arg must be one of civil_comments, toxicity, many_communities."
    return;
fi


gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.10 \
    --config $config \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --region=us-east1 \
    --verbosity=debug \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --model_dir="${JOB_DIR}/model_dir" \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --batch_size=$batch_size \
    --dropout_rate=$dropout_rate \
    --learning_rate=$learning_rate \
    --dense_units=$dense_units \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --model_spec="gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73" \
    --text_feature=$text_feature


================================================
FILE: experiments/tf_trainer/tf_hub_classifier/run.py
================================================
"""Experiments with Toxicity Dataset"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import tfrecord_input
from tf_trainer.tf_hub_classifier import model as tf_hub_classifier

import tensorflow as tf

FLAGS = tf.app.flags.FLAGS


def main(argv):
  del argv  # unused

  dataset = tfrecord_input.TFRecordInput()
  model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval()

  serving_input_fn = serving_input.create_text_serving_input_fn(
      text_feature_name=base_model.TEXT_FEATURE_KEY,
      example_key_name=base_model.EXAMPLE_KEY)
  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,
    metrics_key="auc/%s" % FLAGS.labels.split(',')[0])


if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_hub_tfjs/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/tf_hub_tfjs/model.py
================================================
"""Tensorflow Estimator using TF Hub universal sentence encoder."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import tensorflow_hub as hub
from tf_trainer.common import base_model
from typing import List

FLAGS = tf.app.flags.FLAGS

# Hyperparameters
# TODO: Add validation
tf.app.flags.DEFINE_float('learning_rate', 0.00005,
                          'The learning rate to use during training.')
tf.app.flags.DEFINE_float('dropout_rate', 0.38925,
                          'The dropout rate to use during training.')
tf.app.flags.DEFINE_string(
    'model_spec',
    'https://tfhub.dev/google/universal-sentence-encoder-lite/2',
    'The url of the TF Hub sentence encoding module to use.')
tf.app.flags.DEFINE_bool('trainable', True,
                         'What to pass for the TF Hub trainable parameter.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning. The length of the list
# determines the number of layers, and the size of each layer.
tf.app.flags.DEFINE_string(
    'dense_units', '512,128,64',
    'Comma delimited string for the number of hidden units in the dense layers.'
)


class TFHubClassifierModel(base_model.BaseModel):

  def __init__(self, target_labels: List[str]) -> None:
    self._target_labels = target_labels

  @staticmethod
  def hparams():
    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]
    hparams = tf.contrib.training.HParams(
        learning_rate=FLAGS.learning_rate,
        dropout_rate=FLAGS.dropout_rate,
        dense_units=dense_units)
    return hparams

  def estimator(self, model_dir):
    estimator = tf.estimator.Estimator(
        model_fn=self._model_fn,
        params=self.hparams(),
        config=tf.estimator.RunConfig(model_dir=model_dir))
    return estimator

  def _model_fn(self, features, labels, mode, params, config):
    module = hub.Module(FLAGS.model_spec, trainable=True)
    logits = module(
      inputs=dict(
      values=features['values'],
      indices=features['indices'],
      dense_shape=features['dense_shape']))
    for num_units in params.dense_units:
      logits = tf.layers.dense(
          inputs=logits, units=num_units, activation=tf.nn.relu)
      logits = tf.layers.dropout(logits, rate=params.dropout_rate)
    logits = tf.layers.dense(
        inputs=logits, units=len(self._target_labels), activation=None)

    output_heads = [
        tf.contrib.estimator.binary_classification_head(
            name=name, weight_column=name + '_weight')
        for name in self._target_labels
    ]
    multihead = tf.contrib.estimator.multi_head(output_heads)

    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
    return multihead.create_estimator_spec(
        features=features,
        labels=labels,
        mode=mode,
        logits=logits,
        optimizer=optimizer)


================================================
FILE: experiments/tf_trainer/tf_hub_tfjs/notebook/BiasEvaluation.ipynb
================================================
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "BiasEvaluation.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "metadata": {
        "id": "9B7PdsrvW__k",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "# Bias Evaluation for TF Javascript Model\n",
        "\n",
        "Based on the [FAT* Tutorial Measuring Unintended Bias in Text Classification Models with Real Data](https://github.com/conversationai/unintended-ml-bias-analysis/blob/master/presentations/FAT_star_tutorial.md).\n",
        "\n",
        "Copyright 2019 Google LLC.\n",
        "SPDX-License-Identifier: Apache-2.0"
      ]
    },
    {
      "metadata": {
        "id": "0Jsjp3E5rbuC",
        "colab_type": "code",
        "outputId": "e6aeceef-b28b-4c9d-aec9-c870def2219f",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        }
      },
      "cell_type": "code",
      "source": [
        "!pip3 install --quiet \"tensorflow>=1.11\"\n",
        "!pip3 install --quiet sentencepiece"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\u001b[?25l\r\u001b[K    0% |▎                               | 10kB 16.3MB/s eta 0:00:01\r\u001b[K    1% |▋                               | 20kB 2.2MB/s eta 0:00:01\r\u001b[K    2% |█                               | 30kB 3.3MB/s eta 0:00:01\r\u001b[K    3% |█▎                              | 40kB 2.2MB/s eta 0:00:01\r\u001b[K    4% |█▋                              | 51kB 2.7MB/s eta 0:00:01\r\u001b[K    5% |██                              | 61kB 3.2MB/s eta 0:00:01\r\u001b[K    6% |██▏                             | 71kB 3.7MB/s eta 0:00:01\r\u001b[K    7% |██▌                             | 81kB 4.1MB/s eta 0:00:01\r\u001b[K    8% |██▉                             | 92kB 4.6MB/s eta 0:00:01\r\u001b[K    9% |███▏                            | 102kB 3.5MB/s eta 0:00:01\r\u001b[K    10% |███▌                            | 112kB 3.6MB/s eta 0:00:01\r\u001b[K    11% |███▉                            | 122kB 5.0MB/s eta 0:00:01\r\u001b[K    12% |████                            | 133kB 5.0MB/s eta 0:00:01\r\u001b[K    13% |████▍                           | 143kB 9.3MB/s eta 0:00:01\r\u001b[K    14% |████▊                           | 153kB 9.5MB/s eta 0:00:01\r\u001b[K    15% |█████                           | 163kB 9.5MB/s eta 0:00:01\r\u001b[K    16% |█████▍                          | 174kB 9.3MB/s eta 0:00:01\r\u001b[K    17% |█████▊                          | 184kB 9.4MB/s eta 0:00:01\r\u001b[K    18% |██████                          | 194kB 9.4MB/s eta 0:00:01\r\u001b[K    19% |██████▎                         | 204kB 40.6MB/s eta 0:00:01\r\u001b[K    20% |██████▋                         | 215kB 10.4MB/s eta 0:00:01\r\u001b[K    21% |███████                         | 225kB 10.5MB/s eta 0:00:01\r\u001b[K    22% |███████▎                        | 235kB 10.2MB/s eta 0:00:01\r\u001b[K    23% |███████▋                        | 245kB 10.1MB/s eta 0:00:01\r\u001b[K    24% |███████▉                        | 256kB 10.1MB/s eta 0:00:01\r\u001b[K    25% |████████▏                       | 266kB 9.9MB/s eta 0:00:01\r\u001b[K    26% |████████▌                       | 276kB 10.2MB/s eta 0:00:01\r\u001b[K    27% |████████▉                       | 286kB 10.2MB/s eta 0:00:01\r\u001b[K    28% |█████████▏                      | 296kB 10.2MB/s eta 0:00:01\r\u001b[K    29% |█████████▌                      | 307kB 10.4MB/s eta 0:00:01\r\u001b[K    30% |█████████▊                      | 317kB 42.5MB/s eta 0:00:01\r\u001b[K    31% |██████████                      | 327kB 42.5MB/s eta 0:00:01\r\u001b[K    32% |██████████▍                     | 337kB 49.1MB/s eta 0:00:01\r\u001b[K    33% |██████████▊                     | 348kB 45.7MB/s eta 0:00:01\r\u001b[K    34% |███████████                     | 358kB 44.8MB/s eta 0:00:01\r\u001b[K    35% |███████████▍                    | 368kB 49.3MB/s eta 0:00:01\r\u001b[K    36% |███████████▋                    | 378kB 47.6MB/s eta 0:00:01\r\u001b[K    37% |████████████                    | 389kB 47.8MB/s eta 0:00:01\r\u001b[K    38% |████████████▎                   | 399kB 12.3MB/s eta 0:00:01\r\u001b[K    39% |████████████▋                   | 409kB 12.3MB/s eta 0:00:01\r\u001b[K    40% |█████████████                   | 419kB 12.3MB/s eta 0:00:01\r\u001b[K    41% |█████████████▎                  | 430kB 12.2MB/s eta 0:00:01\r\u001b[K    42% |█████████████▌                  | 440kB 12.1MB/s eta 0:00:01\r\u001b[K    43% |█████████████▉                  | 450kB 12.2MB/s eta 0:00:01\r\u001b[K    44% |██████████████▏                 | 460kB 12.2MB/s eta 0:00:01\r\u001b[K    45% |██████████████▌                 | 471kB 12.2MB/s eta 0:00:01\r\u001b[K    46% |██████████████▉                 | 481kB 12.3MB/s eta 0:00:01\r\u001b[K    47% |███████████████▏                | 491kB 12.2MB/s eta 0:00:01\r\u001b[K    48% |███████████████▍                | 501kB 47.1MB/s eta 0:00:01\r\u001b[K    49% |███████████████▊                | 512kB 44.5MB/s eta 0:00:01\r\u001b[K    50% |████████████████                | 522kB 45.4MB/s eta 0:00:01\r\u001b[K    51% |████████████████▍               | 532kB 47.8MB/s eta 0:00:01\r\u001b[K    52% |████████████████▊               | 542kB 49.3MB/s eta 0:00:01\r\u001b[K    53% |█████████████████               | 552kB 53.0MB/s eta 0:00:01\r\u001b[K    54% |█████████████████▎              | 563kB 53.3MB/s eta 0:00:01\r\u001b[K    55% |█████████████████▋              | 573kB 51.8MB/s eta 0:00:01\r\u001b[K    56% |██████████████████              | 583kB 51.9MB/s eta 0:00:01\r\u001b[K    57% |██████████████████▎             | 593kB 53.4MB/s eta 0:00:01\r\u001b[K    58% |██████████████████▋             | 604kB 53.2MB/s eta 0:00:01\r\u001b[K    59% |███████████████████             | 614kB 57.7MB/s eta 0:00:01\r\u001b[K    60% |███████████████████▏            | 624kB 55.6MB/s eta 0:00:01\r\u001b[K    61% |███████████████████▌            | 634kB 54.8MB/s eta 0:00:01\r\u001b[K    62% |███████████████████▉            | 645kB 53.2MB/s eta 0:00:01\r\u001b[K    63% |████████████████████▏           | 655kB 52.5MB/s eta 0:00:01\r\u001b[K    64% |████████████████████▌           | 665kB 44.3MB/s eta 0:00:01\r\u001b[K    64% |████████████████████▉           | 675kB 45.4MB/s eta 0:00:01\r\u001b[K    65% |█████████████████████▏          | 686kB 45.2MB/s eta 0:00:01\r\u001b[K    66% |█████████████████████▍          | 696kB 45.8MB/s eta 0:00:01\r\u001b[K    67% |█████████████████████▊          | 706kB 45.4MB/s eta 0:00:01\r\u001b[K    68% |██████████████████████          | 716kB 45.6MB/s eta 0:00:01\r\u001b[K    69% |██████████████████████▍         | 727kB 45.7MB/s eta 0:00:01\r\u001b[K    70% |██████████████████████▊         | 737kB 45.4MB/s eta 0:00:01\r\u001b[K    71% |███████████████████████         | 747kB 47.7MB/s eta 0:00:01\r\u001b[K    72% |███████████████████████▎        | 757kB 47.9MB/s eta 0:00:01\r\u001b[K    73% |███████████████████████▋        | 768kB 58.4MB/s eta 0:00:01\r\u001b[K    74% |████████████████████████        | 778kB 55.5MB/s eta 0:00:01\r\u001b[K    75% |████████████████████████▎       | 788kB 55.7MB/s eta 0:00:01\r\u001b[K    76% |████████████████████████▋       | 798kB 53.9MB/s eta 0:00:01\r\u001b[K    77% |█████████████████████████       | 808kB 54.4MB/s eta 0:00:01\r\u001b[K    78% |█████████████████████████▏      | 819kB 28.4MB/s eta 0:00:01\r\u001b[K    79% |█████████████████████████▌      | 829kB 28.7MB/s eta 0:00:01\r\u001b[K    80% |█████████████████████████▉      | 839kB 28.7MB/s eta 0:00:01\r\u001b[K    81% |██████████████████████████▏     | 849kB 28.3MB/s eta 0:00:01\r\u001b[K    82% |██████████████████████████▌     | 860kB 26.5MB/s eta 0:00:01\r\u001b[K    83% |██████████████████████████▉     | 870kB 26.4MB/s eta 0:00:01\r\u001b[K    84% |███████████████████████████     | 880kB 26.9MB/s eta 0:00:01\r\u001b[K    85% |███████████████████████████▍    | 890kB 27.1MB/s eta 0:00:01\r\u001b[K    86% |███████████████████████████▊    | 901kB 27.6MB/s eta 0:00:01\r\u001b[K    87% |████████████████████████████    | 911kB 27.2MB/s eta 0:00:01\r\u001b[K    88% |████████████████████████████▍   | 921kB 49.5MB/s eta 0:00:01\r\u001b[K    89% |████████████████████████████▊   | 931kB 48.4MB/s eta 0:00:01\r\u001b[K    90% |█████████████████████████████   | 942kB 48.4MB/s eta 0:00:01\r\u001b[K    91% |█████████████████████████████▎  | 952kB 48.8MB/s eta 0:00:01\r\u001b[K    92% |█████████████████████████████▋  | 962kB 53.0MB/s eta 0:00:01\r\u001b[K    93% |██████████████████████████████  | 972kB 53.4MB/s eta 0:00:01\r\u001b[K    94% |██████████████████████████████▎ | 983kB 53.5MB/s eta 0:00:01\r\u001b[K    95% |██████████████████████████████▋ | 993kB 52.7MB/s eta 0:00:01\r\u001b[K    96% |██████████████████████████████▉ | 1.0MB 53.2MB/s eta 0:00:01\r\u001b[K    97% |███████████████████████████████▏| 1.0MB 54.1MB/s eta 0:00:01\r\u001b[K    98% |███████████████████████████████▌| 1.0MB 53.1MB/s eta 0:00:01\r\u001b[K    99% |███████████████████████████████▉| 1.0MB 54.6MB/s eta 0:00:01\r\u001b[K    100% |████████████████████████████████| 1.0MB 17.6MB/s \n",
            "\u001b[?25h"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "4bSQf93oVo7j",
        "colab_type": "code",
        "outputId": "191c3e9f-d902-4071-e115-720d8d2ed1a5",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 53
        }
      },
      "cell_type": "code",
      "source": [
        "from __future__ import absolute_import\n",
        "from __future__ import division\n",
        "from __future__ import print_function\n",
        "\n",
        "import re\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "import tensorflow as tf\n",
        "import sentencepiece\n",
        "from google.colab import auth\n",
        "from IPython.display import HTML, display\n",
        "\n",
        "from sklearn import metrics\n",
        "\n",
        "%matplotlib inline\n",
        "\n",
        "# autoreload makes it easier to interactively work on code in imported libraries\n",
        "%load_ext autoreload\n",
        "%autoreload 2\n",
        "\n",
        "# Set pandas display options so we can read more of the comment text.\n",
        "pd.set_option('max_colwidth', 300)\n",
        "\n",
        "# Seed for Pandas sampling, to get consistent sampling results\n",
        "RANDOM_STATE = 123456789"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "The autoreload extension is already loaded. To reload it, use:\n",
            "  %reload_ext autoreload\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "12LU1AjWr-da",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "auth.authenticate_user()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "FFFXbLiRrvtz",
        "colab_type": "code",
        "outputId": "69de8876-a0d8-4e31-816c-a3c135854faa",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 125
        }
      },
      "cell_type": "code",
      "source": [
        "!mkdir -p tfjs_model\n",
        "!gcloud storage cp --recursive gs://conversationai-public/public_models/tfjs/v1/* tfjs_model"      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Copying gs://conversationai-public/public_models/tfjs/v1/saved_model.pb...\n",
            "Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.data-00000-of-00001...\n",
            "/ [0 files][    0.0 B/  3.9 MiB]                                                \rCopying gs://conversationai-public/public_models/tfjs/v1/variables/variables.index...\n",
            "Copying gs://conversationai-public/public_models/tfjs/v1/assets/universal_encoder_8k_spm.model...\n",
            "- [4/4 files][ 32.3 MiB/ 32.3 MiB] 100% Done                                    \n",
            "Operation completed over 4 objects/32.3 MiB.                                     \n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "0bmiyJR60gDP",
        "colab_type": "code",
        "outputId": "ded1805f-f50c-4846-cafb-dfb51d79fa4d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        }
      },
      "cell_type": "code",
      "source": [
        "test_df = pd.read_csv(\n",
        "    'https://raw.githubusercontent.com/conversationai/unintended-ml-bias-analysis/master/unintended_ml_bias/new_madlibber/output_data/English/intersectional_madlibs.csv')\n",
        "print('test data has %d rows' % len(test_df))\n"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "test data has 30240 rows\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "lbF4Fy-yjnaH",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "madlibs_words = pd.read_csv(\n",
        "    'https://raw.githubusercontent.com/conversationai/unintended-ml-bias-analysis/master/unintended_ml_bias/new_madlibber/input_data/English/words.csv')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "rwx0ucIXj4Ba",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "identity_columns = madlibs_words[madlibs_words.type=='identity'].word.tolist()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "mzY7oTzQlHq5",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "for term in identity_columns:\n",
        "  test_df[term] = test_df['phrase'].apply(\n",
        "      lambda x: bool(re.search(r'\\b{}\\b'.format(term), x,\n",
        "                               flags=re.UNICODE|re.IGNORECASE)))\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "6dP7ANLcl1NC",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "_8RfGq2lX2EY",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "## Score test set with our text classification model\n",
        "\n",
        "Using our new model, we can score the set of test comments for toxicity.\n"
      ]
    },
    {
      "metadata": {
        "id": "AfC_yo0Tt5SQ",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "TOXICITY_COLUMN = 'toxicity'\n",
        "TEXT_COLUMN = 'phrase'"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "E0KT0565tUDp",
        "colab_type": "code",
        "outputId": "7bbd2622-ea7e-43dd-a6b9-86d1e033508a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 289
        }
      },
      "cell_type": "code",
      "source": [
        "predict_fn = tf.contrib.predictor.from_saved_model(\n",
        "  'tfjs_model', signature_def_key='predict')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\n",
            "WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
            "For more information, please see:\n",
            "  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
            "  * https://github.com/tensorflow/addons\n",
            "If you depend on functionality not listed there, please file an issue.\n",
            "\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "Use standard file APIs to check for files with this prefix.\n",
            "INFO:tensorflow:Restoring parameters from tfjs_model/variables/variables\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "ZppO68XctZPH",
        "colab_type": "code",
        "outputId": "630cbb60-9f58-4d28-a5da-45b4091f6715",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        }
      },
      "cell_type": "code",
      "source": [
        "sp = sentencepiece.SentencePieceProcessor()\n",
        "sp.Load('tfjs_model/assets/universal_encoder_8k_spm.model')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 17
        }
      ]
    },
    {
      "metadata": {
        "id": "Q3heBWS5tdg9",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "def progress(value, max=100):\n",
        "    return HTML(\"\"\"\n",
        "        <progress\n",
        "            value='{value}'\n",
        "            max='{max}',\n",
        "            style='width: 100%'\n",
        "        >\n",
        "            {value}\n",
        "        </progress>\n",
        "    \"\"\".format(value=value, max=max))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "KSG_Dc7Gti-w",
        "colab_type": "code",
        "outputId": "86ab2dd0-cd14-48f4-f42a-7a7216de26ec",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "cell_type": "code",
      "source": [
        "tox_scores = []\n",
        "nrows = test_df.shape[0]\n",
        "out = display(progress(0, nrows), display_id=True)\n",
        "for offset in range(0, nrows):\n",
        "  out.update(progress(offset, nrows))\n",
        "  values = sp.EncodeAsIds(test_df[TEXT_COLUMN][offset])\n",
        "  tox_scores.append(predict_fn({\n",
        "      'values': values,\n",
        "      'indices': [(0, i) for i in range(len(values))],\n",
        "      'dense_shape': [1, len(values)]})['toxicity/probabilities'][0,1])"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "        <progress\n",
              "            value='30239'\n",
              "            max='30240',\n",
              "            style='width: 100%'\n",
              "        >\n",
              "            30239\n",
              "        </progress>\n",
              "    "
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {
            "tags": []
          }
        }
      ]
    },
    {
      "metadata": {
        "id": "V0YjAtUBWa1p",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "MODEL_NAME = 'tfjs_model'\n",
        "test_df[MODEL_NAME] = tox_scores"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "P31u4dyyaKKE",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "# Evaluate the overall ROC-AUC\n",
        "\n",
        "This calculates the models performance on the entire test set using the ROC-AUC metric."
      ]
    },
    {
      "metadata": {
        "id": "o6IClt8eplMn",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "SUBGROUP_AUC = 'subgroup_auc'\n",
        "BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC = 'background_positive_subgroup_negative_auc'\n",
        "BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC = 'background_negative_subgroup_positive_auc'\n",
        "\n",
        "def compute_auc(y_true, y_pred):\n",
        "  try:\n",
        "    return metrics.roc_auc_score(y_true, y_pred)\n",
        "  except ValueError:\n",
        "    return np.nan\n",
        "\n",
        "\n",
        "def compute_subgroup_auc(df, subgroup, label, model_name):\n",
        "  subgroup_examples = df[df[subgroup]]\n",
        "  return compute_auc(subgroup_examples[label], subgroup_examples[model_name])\n",
        "\n",
        "\n",
        "def compute_background_positive_subgroup_negative_auc(df, subgroup, label, model_name):\n",
        "  \"\"\"Computes the AUC of the within-subgroup negative examples and the background positive examples.\"\"\"\n",
        "  index = df[label] == 'toxic'\n",
        "  subgroup_negative_examples = df[df[subgroup] & ~index]\n",
        "  non_subgroup_positive_examples = df[~df[subgroup] & index]\n",
        "  examples = subgroup_negative_examples.append(non_subgroup_positive_examples)\n",
        "  return compute_auc(examples[label], examples[model_name])\n",
        "\n",
        "\n",
        "def compute_background_negative_subgroup_positive_auc(df, subgroup, label, model_name):\n",
        "  \"\"\"Computes the AUC of the within-subgroup positive examples and the background negative examples.\"\"\"\n",
        "  index = df[label] == 'toxic'\n",
        "  subgroup_positive_examples = df[df[subgroup] & index]\n",
        "  non_subgroup_negative_examples = df[~df[subgroup] & ~index]\n",
        "  examples = subgroup_positive_examples.append(non_subgroup_negative_examples)\n",
        "  return compute_auc(examples[label], examples[model_name])\n",
        "\n",
        "\n",
        "def compute_bias_metrics_for_model(dataset,\n",
        "                                   subgroups,\n",
        "                                   model,\n",
        "                                   label_col,\n",
        "                                   include_asegs=False):\n",
        "  \"\"\"Computes per-subgroup metrics for all subgroups and one model.\"\"\"\n",
        "  records = []\n",
        "  for subgroup in subgroups:\n",
        "    record = {\n",
        "        'subgroup': subgroup,\n",
        "        'subgroup_size': len(dataset[dataset[subgroup]])\n",
        "    }\n",
        "    record[SUBGROUP_AUC] = compute_subgroup_auc(\n",
        "        dataset, subgroup, label_col, model)\n",
        "    record[BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC] = compute_background_positive_subgroup_negative_auc(\n",
        "        dataset, subgroup, label_col, model)\n",
        "    record[BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC] = compute_background_negative_subgroup_positive_auc(\n",
        "        dataset, subgroup, label_col, model)\n",
        "    records.append(record)\n",
        "  return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)\n",
        "\n",
        "bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns, MODEL_NAME, TOXICITY_COLUMN)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "GS9t687KogDQ",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "# Plot a heatmap of bias metrics"
      ]
    },
    {
      "metadata": {
        "id": "B5OxkxMqNvaB",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "Plot a heatmap of the bias metrics.  Higher scores indicate better results.\n",
        "* Subgroup AUC measures the ability to separate toxic and non-toxic comments for this identity.\n",
        "* Negative cross AUC measures the ability to separate non-toxic comments for this identity from toxic comments from the background distribution.\n",
        "* Positive cross AUC measures the ability to separate toxic comments for this identity from non-toxic comments from the background distribution."
      ]
    },
    {
      "metadata": {
        "id": "AGb1CQn2PZVX",
        "colab_type": "code",
        "outputId": "15595027-3db8-4526-a4ea-596691143f93",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1645
        }
      },
      "cell_type": "code",
      "source": [
        "def plot_auc_heatmap(bias_metrics_results, models):\n",
        "  metrics_list = [SUBGROUP_AUC, BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC, BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC]\n",
        "  df = bias_metrics_results.set_index('subgroup')\n",
        "  columns = []\n",
        "  vlines = [i * len(models) for i in range(len(metrics_list))]\n",
        "  for metric in metrics_list:\n",
        "    for model in models:\n",
        "      columns.append(metric)\n",
        "  num_rows = len(df)\n",
        "  num_columns = len(columns)\n",
        "  fig = plt.figure(figsize=(num_columns, 0.5 * num_rows))\n",
        "  ax = sns.heatmap(df[columns], annot=True, fmt='.2', cbar=True, cmap='Reds_r',\n",
        "                   vmin=0.5, vmax=1.0)\n",
        "  ax.xaxis.tick_top()\n",
        "  plt.xticks(rotation=90)\n",
        "  ax.vlines(vlines, *ax.get_ylim())\n",
        "  return ax\n",
        "\n",
        "plot_auc_heatmap(bias_metrics_df, [MODEL_NAME])"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7f4f00b24ba8>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 97
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAASkAAAZKCAYAAACEXpf4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xd8FWX2x/HPTSiSAiFA6EVAjnRE\nQAFZitjXsq4oq1gWxbWLXekoCoIookixK7iyq7jq7k9FRRREQKQXj0qTKklICCGEkuT3x1ySCwQS\nJLkzcz3v1yuv3Jm5IV8u5OSZ5848J5CXl4cxxnhVlNsBjDHmeKxIGWM8zYqUMcbTrEgZYzzNipQx\nxtOsSBljPM2KlDHG06xIGWM8zYqUMcbTrEgZzxGRq0Xkg5DtmSJylZuZiuLHzH5hRcp40X3ATSHb\nlwH3uxOl2PyY2ResSBkvigb2hmxHAQGXshSXHzP7Qhm3AxhTiBeAlSKyBueHvwkw1N1IRfJjZl8I\n2CoIxotEJA5oChwEVFWzXI5UJD9m9gMrUsZzROQr4Kj/mKraw4U4xeLHzH5hp3vGi+4KeVwWOAeo\n5FKW4vJjZl+wkZTxBRH5TFUvcDvHifBjZi+ykZTxHBG544hdtYIfnuXHzH5hRcp4UbWQx3lACnCx\nS1mKy4+ZfcFO94wnBd8pSwxulgcmqOr5LkYqkh8z+4FdzGk8R0QGA8uBFcD/gEXAUldDFcGPmf3C\nipTxootVtSGwWFVbAt2BHJczFcWPmX3BipTxojwRCQBlRKSCqi7GeUvfy/yY2Rds4tx40XtAf2Aa\nsExEfgP2uBupSH7M7As2cW48TUTqAVWBpaqaKyKXq+qHbuc6Hj9m9jIbSRlPU9VfgV9Ddt0LePoH\n3o+ZvczmpIzf+HH5Ez9m9gwrUsZv/Dg/4cfMnmFFyhjjaVakjN/48dTJj5k9w4qU8SQRKS8iDQo5\n9Gy4sxSXHzP7gRUp4zki0hv4AfhvcHu8iNwAoKofu5ntWPyY2S+sSBkvuhNoCyQHtx8GjlwKxWv8\nmNkXrEgZL8pR1f0UvCu2z80wxeTHzL5gRcp40VwReRuoIyKPAHOAz13OVBQ/ZvYFuy3GeE7wRt3O\nQCdgP7BAVb9zN9Xx+TGzX9htMcaL1gGf4dy0O0tVc13OUxx+zOwLdrpnvKgpzsJxvYEVIvKKiHi9\noYEfM/uCne4ZTxORJsBA4BpVPcXtPMXhx8xeZqd7xnNE5BzgMuACYAvwH+AhV0MVwY+Z/cKKlPGi\n+4H3gSdVdZfbYYrJj5l9wU73jGccWhxORO6k8JblL7kQ67j8mNlvbCRlvCQh+LlqIce8+tvUj5l9\nxYqU8QxVfTP4MEdVR4QeE5GxLkQqkh8z+42d7hnPEJErgb8BfwK+DjlUFjhDVRu4ket4/JjZb6xI\nGU8JLnXyIjAmZHcusFpVU10JVYTjZF6jqimuhIogdjGn8RRV3aCqfwZScOZ08nBals92M9exiMhZ\nqroBmAjEhnzEAx1cjBYxbE7KeI6ITMK5gvt0YCHQDnja1VDH1g1YAFxVyLE84P/CmiYC2UjKeFFz\nVe2Kc7p0Kc6IpJnLmQqlqoeKZz/gUVX9OzAKp4WVrSdVAqxIGS8qIyIVAUSkmqpuAlq7nKkoU4GO\nwfmpfwPNgTeP+xWmWKxIGS96Abgm+HmFiGwCVrsbqUjVVfU/ODcYv6CqTwKJLmeKCPbunvE0ESkL\nxKvqTrezHI+ILMC5NWYyzjzVQeALVW3nZq5IYBPnxnNEZF0h+3KAtcAAVV0c/lRFGoyzrvlIVU0R\nkUHAeJczRQQbSRnPEZHHgHTgI5x3yC4GqgFfAc+o6jkuxjsmEWkItMK5RmpJcC7NnCSbkzJedJGq\nTlTVLaq6VVVfAc5T1fluBzsWEXkY+BfQHaeofigit7ubKjLY6Z7xomwReQ74FmdU0g4oJyLnAZmu\nJju2y4GzVDUHQETK4NwmM9HVVBHAipTxoquAG3BGJQGcuajLca7kvsbFXMcTwCmoh+RiqyCUCCtS\nxnNUNUNEFgMpqvquiNQM3rfnyXv3gqYDP4jIdzjTKGcDU9yNFBls4tx4joiMAeoBjVX1TBEZBiSq\n6j3uJju+4IWcbXBGUEtVdaO7iSKDTZwbL2qnqtcAGQCqOgw4w9VERRCR1sCzwJPBj9Ei0tzdVJHB\nTveMF5UNXsSZByAiVQGvd115HRgCfIczP9UJ51YZTxdXP7AiZbxoLDAfqCcin+CsiHCfu5GKlKqq\n/w3Z/khE+rmWJoLYnJTxJBGJxblJdx/wk6rudTnScYnIC0A08AXONEoXnNHfhwCqaku2/E42kjKe\nE+z8+w+gEs6pEyKCqvZwNdjxxQU/X3rE/l7YulInxYqU8aJxQH9gs9tBiiu4jlShRMQu6DwJVqSM\nF61T1c/cDlGCxO0AfmZFyniRisi/gLk4S544O/3baDPgdgA/syJlvGhX8KOy20FKiL07dRKsSBnP\nUdXhxzomIh+o6l/Cmce4y644N36TUPRTPMdO906CjaSM33jy1ElE6uJccNoEJ+NqYJyqbgPOdzOb\n31mRMqZkTAfeAabhjJw6Au8DnVT1gJvB/M6KlDElI1tVXwzZXiQiF7uWJoJYkTJ+k+Z2gGNYFFxC\nOPS2mB9FpBmAqnq9JZdn2b17xnOCy54M5fD5ncdVdZWrwY5DRL46zuE8j9/S42k2kjJe5LtlT1S1\nu9sZIpUVKeNFvlv2RESSKXjnsSwQD6xX1dPcSxUZ7HTPeE4kLHsiIq2APqr6sNtZ/M4u5jReFAdU\nwFn25BKgIlAOZ9mTq1zMVWyquhznNNWcJDvdM140tLCdqvpruIMUl4j8m8MvNK0F7HEpTkSxImW8\n6H0KfuDLAQ2BxUA3twIVQ+g1Unk4TSSWuZQloliRMp6jqu1Dt0WkBvCES3GKaxnOQn1tcBqDLgJ+\nwbsdl33D5qSM56nqdqC12zmK8CbO6OlxYDSQg3MphTlJNpIyniMi31NwuhcAqgOfu5eoWOJV9dmQ\n7fki8oVraSKIFSnjRaHv4OUBGaqa7laYYooWkXaqughARM7CzlRKhBUp41XDCZnfEZGhwWVPvOpO\n4PngvXp5wMrgPnOSrEgZL3oVmAjcj/PuXrfgPi+vKtBeVc91O0QksiJlvChaVd8P2X7X67fFAOeL\nyHeq+qPbQSKNFSnjRftFpBcwG2fivAdOJ2MvawesFJE9wP7gvjxVTXIxU0Swe/eM54hIbZy38tvh\nzEl9D3h9TsqUEhtJGS+6QVVvdjvEiRCRWYXszgHWAqNUdUN4E0UOK1LGi5JE5DycEdShUydUNcu9\nSEWaC5QHPsJ5d++i4P5VOBd12npTv5MVKeNFlwBXHLEvD+cePq/qcsTCd/NEZKaqDhaRO1xLFQGs\nSBnPUdUmbmf4HcqLyL3AtzjzaO2BqiLSEeu7d1KsSBnPEZF1hew+NL8zQFUXhzlScfTC6bs3DKco\nrQWuxrnO61r3YvmfFSnjRS8D6RTM71wMVAO+AsYD57gX7Zi241wysY2C5hFrVdXePj9JVqSMF12k\nqn8K2X5FRGap6kgRcS1UEf6FM9r7PrjdD/g7cI1riSKEFSnjRdki8hyHz++UC77j59X1mWqoaufQ\nHSLytVthIondpW286CqcOZ1uwLlAMnA5oHh3ZLJQRPIX6xORMygYVZmTYCMp42V5OCOpLGCfqqa6\nnOcoIa2sAsC9IpId3K4AbAEedDFeRLAiZbzoA+AH4Jvg9tnADOB81xIdg6pWcztDpLMiZbyo7BH9\n6v4tIp5emTPYZv2od/KsvfrJsyJlPENEYoIP54SsgpCH0xzU65PQd4U8LotzmUQll7JEFFsFwXiG\niKynYH7nSHmq6uXbYo4iIp+p6gVu5/A7G0kZz1DVU93O8HsVcn9ereCHOUlWpIznhIyoQuWqamM3\n8hRT6AR6Hs5lE5e4lCWiWJEyXtQi5HFZnDkpz15qHjQO54JOFZFuOE0k9robKTLYnJTxheBtMZ59\np0xEPgGexhlBvYFTtP6mqn92M1cksJGU8RwRGcPhp3u1gHiX4hRXeVWdLSLDgedU9R0R+bvboSKB\nFSnjRStDHufh3MNX2PK8XnKKiFwH9AbaiUgD7BKEEmH37hkvmgWsUdU3cYpUd7z/TtkdQAfgdlXd\njTNpPsjdSJHB5qSM5wRXD7gXOAV4ChgMDPHrNUci8oGq/sXtHH5lIynjRQdVdSnwV2Ccqn6Lv6cm\nEtwO4GdWpIwXlRGRgcBlwMzgEihxLmc6GXa6chKsSBkv6oOzPMuVqpqN0yXmNgARKe9mMBN+fh5C\nmwilqpuA50K2p4cc/gSn7br5g7CRlPEbP7aHSnM7gJ/ZSMr4jSfnd0SkIs5yLUmq2l9EugNLVDVd\nVf/qcjxfs5GUMSXjDZwR06F1zpOAd1xLE0GsSBm/8erpXryqTgT2Q/48WgV3I0UGK1LGb1a7HeAY\nokSkEcHTURG5EIh2N1JksCvOjeeISB1gCFBZVXuJSG/gO1Xd6HK0YxKRpsALOLfG7AGWAf1V9UdX\ng0UAmzg3XvQK8DzwaHB7B86cT3e3AhXDucD1qrrN7SCRxk73jBdFq+onOD33UNVZeP//aiLwsYjM\nEZH+wdGgKQFe/4c3f0wHRKQHEC0i1UXkNjy+yqWqPq6q7YBrgQPAZBGZ63KsiGBFynjRzTg/7FWB\nz3CW4vX8AnLBa6U6Bj9qAkvdTRQZbE7KeNEDwCuqeovbQYpLRL7EKUz/BV5U1fkuR4oYVqSMFy0H\nHhKRZjgjqfdU1eunTv1VdYXbISKRXYJgPCu44kFP4G/An1S1nsuRjnJoQTsRSebwW3YCOA1Nk1yK\nFjFsJGU8KXjd0aXBjzxgvLuJChey4mbb4OoN+YIjQXOSbCRlPEdEFPgV+AD4wMvXHolIVaA68Bpw\nEwW37ZTBOU1t4lK0iGEjKeMZIlJeVfcB5wEpIftjAFQ1y61sx9EU6As0AV4K2Z8LTHUlUYSxImW8\n5HWcSw++pmB+59DIJA9nhU5PUdU5wBwRmaaqX4QeE5EbXYoVUaxIGc9Q1WuDD69W1e9DjwUv7vSy\ndBH5N1AluF0OqAG86V6kyGBFyniGiDQGBHhKRB7l8Pmd8UADl6IVxwvAAJxW67cDfwHsWqkSYEXK\neEkFoB3OgnFXh+zPBYa5EegEZKnqVyKyT1V/AH4QkU9xLu40J8He3TOeIyLNVXWV2zlOhIh8DLwM\nXAVsBtYCD6iqXYZwkmwkZTwjpNPvbBHx24WR1+LMQd0F9AdaAde7mihC2EjKmBIgIjcUsjsHWGv3\n8Z0cG0kZzwn+wJcF3gI+xnnH7FVVneRqsOM7F+gCfIlzuUQ34Hugioj8rKp3u5jN12ypFuNFt+O8\ndX81sEJV2wO93I1UpCpAC1Xtp6q3Aq2BU1T1QpylZszvZEXKeFGOqh7EmYSeFtx3iot5iqMeEBOy\nXQ44TUQSgDh3IkUGO90zXrRYRH4BVFWXisjdOPfyedkYYImI7MI53UsERuCcBj7rZjC/s4lz40ki\nUllV04KP6wHbVPWAy7GOS0QCOKuJBoBUVc1xOVJEsNM94zki0gb4QEQ2ichWnO4xjV2OdVwi0gJn\ngb6PVHUHcLeItHU5VkSwImW8aDzOhZB1VbUWTg++CS5nKsoLwL1AdnB7Jh5dA8tvrEgZLzoYvLUE\ngOB1Rl6flzioqmsObajqaoItuczJsYlz40XpIvIQMBtnfqcHsNPVREVLF5G+QKyInIVzg/EOlzNF\nBBtJGS+6CeeSg4HAYzj/T73e0urvQC2cxfoeBdJx/h7mJNlIKsIFV7XsqaofBbevB2ao6h53kx1X\nFrAMZ34nD1gV3OdZqpopIh/hLNgXhZO7LfCNq8EigBWpyPcuzq0ah1QA3gEudydOsbyDc5o3P/j5\nFuBGoLeboY5HRP4HVMZZASF0NVErUifJilTkS1DV5w9tqOoUEfmbm4GKoY6qdgrdISJe/2GvfGRm\nUzKsSEW+DBG5C/gW5zTkXGCXu5GKtFBE2h9aQlhEzsC5WdfL5vpxHSw/sCvOI5yIVAIexJkfOYjz\nwz5eVTNcDXYcIrIZZxI6E4jGOUVNDR725LpSIvIzTqOIXTivsx/WwPIFK1IRTkT+FHwYOk+Cqnr9\n9KlQInKFqv7H7RwnQkQuV9UP3c7hV3a6F/lC1zEqC5wBLMK/E7r3AL4qUjhXoluR+p2sSEU4VT1s\nHabgJQmvuhSnJASKforn+DGzZ9jFnH88uYCfmwP4cX7Cj5k9w0ZSEU5EknF+SA79Ns8FJrqXyJgT\nY0UqwqlqtSP3iUhPN7KUED+eOvkxs2dYkYpwInIqcAeHt//uCtR1LdQxBBe3OyZV/RWPrXLpx8x+\nY0Uq8r0JvI7TC+5xnNthbnU10bG9j3NqWg6n3fo6nOukTgWWAGer6sfuxSuUHzP7ik2cR74Dqvo6\nkK6q76vqDRx+WYJnqGp7Ve0ArABOU9Xmqno60AT40d10hfNjZr+xIhX5AiLSFUgVkVtF5Fyc3/Je\n1kRVNx/aUNWNOD/0XubHzL5gp3uR73qgJs5FkI8Dl+DcJuNlC0RkIbAA51TqTGC5u5GK5MfMvmC3\nxfyBicgHqvoXt3MURkSaUnA9l6rqSjfzFIcfM/uBFak/MBH5SlW7u53jSMFuMTcAlQh5+15V+7oW\nqgh+zOwXdrr3x+bV31DTcDqtbC7qiR7ix8y+YEXKeNEmVZ3sdogT5MfMvmBFynjRDyIyBpiDszYT\nAKr6f+5FKpIfM/uCFak/tjS3AxxDreDn0En9PMDLP/B+zOwLVqQinIi0BobiXLOTB6wGHlfVVar6\nV1fDHdtQtwP8Dn7M7AtWpCLf6zhtyr/DedepEzAVZ/E7rzp0qwk4t5s0BBYD3dwKVAx+zOwLVqQi\nX6qq/jdk+yMR6edammJQ1fah2yJSA3jCpTjF4sfMfmHXSUU4EXkB54bXL3Bug+qC0x34Q/DPxK6I\nLAzeI+cbfszsRTaSinxxwc+XHrG/Fx6d2BWR7yk4dQoA1YHP3UtUND9m9gsrUpGv0And4DpHXnVV\nyOM8IENV090KU0x+zOwLVqQin18ndIcDbXCWO14kIkNVdZvLmYrix8yeZ0Uqwvl0QvdVnHXY78cp\nrN2C+y52MVNR/JjZF6xI/cGo6vbgtVNeFq2q74dsv+v1dyTxZ2ZfsCIV4Xw6obtfRHoBs3Ey9wD2\nuZqoaH7M7AtWpCKfHyd0++Is0DcIZ37ne+BmVxMVzY+ZfcGK1B+D3yZ0b1BVv/2A+zGzL1iRinx+\nnNBNEpHzcEYj+w/tVNUs9yIVyY+ZfcGKVOTz44TuJcAVR+zLw7l8wqv8mNkXrEhFPt9N6Kqq77qs\n+DGzX1iRiny+m9AVkXWF7M4B1gIDVHVxmCMVyY+Z/cKKVOTz44Tuy0A68BHOKdPFQDXgK5x1xM9x\nL9ox+TGzL1iRinx+nNC9SFX/FLL9iojMUtWRIuJaqCL4MbMvWJGKfH6c0M0WkeeAb3FOUdsB5YLF\nNtPVZMfmx8y+YEUqwvl0QvcqnB523XEm+9cCl+EsO3ONi7mOx4+ZfcGKVITz6YTuXar61KENEUkC\npqjqVcf5Grf5MbMvWJGKfH6c0I0TkbeAW3AW5xsEDHM1UdH8mNkXbPngCCci3xwxoUtwQreHiMxV\nVS8WKUTkKmAUsAroq6qpLkcqkh8z+4EVqQgnIjNxfmgOTei2x1nnfDjwgKpe6GK8wwSba4b+h2wB\nNAD+C6CqD7sQ67j8mNlv7HQv8h2a0O1GwYTu5UAs3pvQXXnE9ipXUpwYP2b2FStSfxx5OCOpLGCf\nF09FVPVNABGpBVyqqpOD248Bb7gY7Zj8mNlvotwOYErdB0A94BtgDnAaMMPVREV7k8NbwC8P7vMy\nP2b2BStSka+sqj6sqv8OfjyAc9rnZRVU9V+HNlT1fzjLzHiZHzP7gp3uRSgRiQk+nBOyCkIezqT5\n127lKqaNIvIMzmR/FM7KDRvdjVQkP2b2BStSkWsVTlEKANcecSwPGBH2RMV3Y/CjJ86Fp/OBd11N\nVDQ/ZvYFuwTBeJKINAeqBDfLA8+qaksXIxXJj5n9wEZSEU5E1nP4dTwAuara2I08xSEik4CmwOnA\nQuBMYLSroYrgx8x+YRPnka8F0DL40Ra4F5jkaqKiNVfVrsAaVb0UOAto5nKmovgxsy9YkYpwqron\n5CNdVT/G200YAMqISEUAEammqpsArzc09WNmX7DTvQhXyG0bNYF4l+IU1ws4V8O/AGwWkR14v6Gp\nHzP7ghWpyLcbp/HCdpxidSFwnauJilYeGALsAn4B6gJfuJqoaH7M7At2uhf5zgU+AxS4CbgdeMbN\nQMXQH2itqi1UtTnQCPD6jbp+zOwLVqQi30FVXQr8FRinqt/i/RH0Zpw1sA5Jwbkx2sv8mNkXvP6f\n1Zy8MiIyEGcp28Ei0h5nSVvPCZk/2wssEZG5we2OwI9uZjsWP2b2GytSka8PznItV6pqtog0BG5z\nOdOxHFr25MjlTr4Pd5AT4MfMvmJXnBtjPM3mpIwxnmZFyhjjaVakjDGeZkXKGONp9u6eRzWoFJv3\nyy2eaeRyXI1f+ZTcfQdY0eF0t6MUW+uNziVNG1YvdznJCYpN8PqqqiXORlLGGE+zImWM8TQrUsYY\nT7MiZYzxNCtSxhhPsyJljPE0K1LGGE+zImWM8TS7mDPCRF12E4H6TYA8cv7zGmwKrrtWMZHo6+4t\neGKV6uT+byp5S+YS6HYZUW3/BDk55MyYUvA1YXDKXQ8R3awVkEf2+KfJ+bFgxZNAUnVihjwNZcuS\n89MasseOgECACg8MJqphYzhwgL1jnyD31w1hy3ukn35Zyx33P8RN1/6NPr17HXZs3oKFPPviRKKj\novjTOZ24s9/NLqX0NxtJRZKGzaBaTXJeGEDO9JeIviLkhyJjJzkThzofk4dDegp5qxZB9bpEtelM\nzriHyXlvElHN2oUtbnTrM4mqU489d1zP3qeHcso9jx52/JQ7HmTf9LfY84/rIDeXQFINypzTHeLi\n2HPHDc7X3PFA2PIeKWvvXp4Y/Qwd2xf+mo0YPZYXxozin6+/zLffLeCXdevCnDAyWJGKIFGntSJv\n5UJnY8cWiImD8hWOel6gfXdyl8+H/dkEmp1J7rJ5kJsLW9aT+9n0sOUtc+ZZHJjzFQC5G9cTiK8I\nMbHBkAHKtG7LwW9nA5D93FPk7dhOVJ365Kxx1pnL3bqZqBq1IMqd/8blypbl5fHPkVSt2lHHNm3e\nQqVKFalZozpRUVF0PacT3y1c5EJK/7PTvWMI9lCbAZwCzAKuBwJAC1XNFJFncFZlfBuYAjQEygJD\nVHWWiDQDXsRZSnY3ThOEBGAqkAm8qKr/LdHQ8QmwOeRULXOXs2/f3sOeFnVWT3ImPw5AIDHJGaX0\nGwTR0eR++AZs21iisY4lKrEqOT+tzt/OS08jKrEquVl7CCRUJi9rj3M62KQpB5cvZt+U8eSu+5ly\nV/dh/7+nElW7HlE16xColEBe2s6wZA5VpkwZypQp/EcoOTWVxMqV87cTExPZtGlzuKJFFBtJHdv1\nwFJVPQdYjVOgCnMtsE1VuwNXAOOC+18A/qGq5wIzgTuD+88ArivxAlWYQCGR6zchb8eWwwtXVBS5\nL48g97PpRF99R6nHOqbQuIEAUVWrs/+9aey5py/RpzWlzNldOLhgLjlrVhL7wuuU69WHnI3rCv97\neo2tgPu72Ujq2JoCs4OPZx/7aXQCuojIOcHtCiJSDugAvCwi4PRkO7Tm9VpVTS3xtAAZO52R0yEV\nE2F32mFPiWrWjryfQu78z9zlFC2A9T9C4tGnLqUlNzWZqMSqBdmqJpGbmgxA3q50cn/bSu5WZ/Rx\n8IcFRJ3aGObPYd8rL7Iv+DVx//yfK6OooiRVq0pKSsE/8287kgs9LTRFs5HUsQUo6Px7MPg59Ndh\n2eDn/cCTqtot+HGaqu4HsoDuwX0dVfWekOeXityflhFo3dHZqH2qU7T2ZR/+pLqNyNu6oeBr1iwm\nIG2cjaTakF469bMwB7+fR5mu5wEQ1aQpuSk7YG+WczAnx5lzqlMPgGhpSu6m9UQ1akKFR4YDUKZD\nZ3J/WuPJUUqdWrXI3LOHzVu3cvDgQb6aM5fOHc9yO5Yv2Ujq2H4EzgLeB3oG92UANUVkHXA2sARY\nAFwO/FNEkoD+qjoAWIbTLfgTEekNJFPafdg2KGxeR/TdT0JeHjnvv0ygfXfYuyd/Qj1QsbIzV3XI\nrz9D07ZE3/0UADkzXi7ViKFyVi4j56fVxL70FuTmsve5pyh74WXk7cnk4JxZZL8wmgqPPQFRUeSs\n+5mD337tfGFUFLGTp8H+/WQ98ejxv0kpWrl6DU8/N54tW7dRpkw0n305ix5du1CnVi3O69GNYY89\nwgOPDQbg4vN7cmr9eq5l9TPrFnMMIpIAfABEA3Nx5p6eBB7A6QacCnyDMxE+CWgWfO4wVf1ERJri\nTKjn4vRkuxaoCLynqkW+z2+L3pUuW/TOP2wkdQyqmg50BxCROOBaVX0ZKGyocUshX78G6HLE7p1A\n+C5EMiYC2JyUMcbTbCRVDKqaCTRwO4cxf0Q2kjLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSlj\njKfZJQheFVeRqL/fW/TzvODdeUTFB6j45ltuJym+LhcDkLctfKuQloRA4zPdjhB2NpIyxniaFSlj\njKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKfZxZwRZuTr01n20zoCgQAD+l5Dy8an\n5h/7cuFSJr33X8qVLcvFndtz3cU9ABjz1nv8sOZncnJy6HflxZx/dtvw5Z38JkvX/EwgEGDgbTfS\nUhoX5P3ueya+M8PJ260TfS67kL3Z+3hs7EukpO9i//4D3H7tlXQ/K7wXOI6c8jZL9WcCBBj4jxto\n2aRRSOZFTJz+H8qVLcPFf+pIn0svcDI/N6kg89/+QvcO4XuN/c6KVBFE5CachqAPFnKsHlBDVReK\nyDjgeVVdH+6MhyxcpWzc9hvvjnyMtZu3MXDCG7w78jEAcnNzGfHKO7w/ZjAJ8bHcOuJ5zj2rDRu3\n7uDnX7fw7sjHSNudyV8ffCI8WTlQAAAgAElEQVRsRWrh8tVs2LKd6eNGsPbXzQx4dhLTx43Iz/vE\nhNeZ8eIoEirG0W/QSHp2bM/i1UqLJg25pdflbPktmb4DngxrkVq4Yg0btm5n+tjHWfvrFgY8P5np\nYx8vyDzpDWaMf4qE+Dj6DX2anh3bsXj1T7Q4rSG3XHUpW3Yk03fgSCtSJ8CK1MnpAcQBC1W1v9th\n5i//kXM7nAFAozo1ycjMIjNrL3ExFUjbnUl8bAyJleIBOLtVU75btobLunak5WnOaKtiTAxZ2fvI\nycklOrr0ZwLmL11Jz47Oku+N6tUhI3MPmXuyiIuNIS1jN/FxMSQmVASg4xktmLdkBVee3y3/67cn\np1KjamKp5zx25tpO5qws4mKCmWNjSawUzNy6BfOWrOTK87q6mtnvrEgVk4g8i9Pw8xSc7jAfAsOA\nAyLyK3A/cBdwFVAJEKAR0B/4BaerTEfgVGA60FFVD5RkxpT0XTRvVD9/O7FSHMnpu4iLqUBixXj2\n7M1mw9bfqJ1UhYUrlfbNmxAdHUVMdHkA3v9yLn9q2zIsBQogOS2d5qcVnI4mVqpIclo6cbExJFaq\nyJ6sbDZs2Ubt6tVYsGw1HVo1y39u7/sG81tKKpOGPxKWrIdlbnxk5l3ExQQz791bkHn5ajq0bFqQ\n+YGh/Ja6k0lDHwprZr+zIlV8G1T1fhGpgNOF+BUReQNIUdWPROT+kOfWVdWLReRC4DZVvUJEPgH6\nAhcA95Z0gSpMaLeyQCDAyLv/zqCX3iAupgK1k6oe1ur0y4VLeX/WXF4Z7N6AMLS9WiAQYNSDdzDw\n2UnExcZQp0a1w46/+9wTrFm7gYdGv8iHE0cTcKnV+lGZ77+dgc9PIS4mhjrVqx32b/Du2OFO5mcm\n8OGLo1zL7DdWpIovUUTm4XQgLqpf9tzg5804oyqAkcC3wDJV/bY0AiYlJpCSXtD4c8fOdJIqF7Rd\n79BcmDrCGXk8O3UGtZKqOGGXrGTy+/9jyqD+xMfGlEa0Y+StTPLO9JC8aVRLrFyQt1Uzpo11uhWP\nfe0dalevxsqf11EloSI1q1WlaaMG5OTmsHNXBlUSKh3155dK5iqVSQ59jVPTqBb6GrdsyrTRQ53M\nb7xL7epVg5krUbNaFSdzTm5YM/udXYJQPGfizD91VdVuwL4inn8w5PGhX5cxwcfVSzxdUOfWzfjs\nux8AWLVuI0mJCcRWOCX/+K0jnid1VwZZ2fv4atEyOrVqyu49WYx56z0mDribhPjY0opWeN4zWzFz\n7gIn78/rSEqsTFxMhfzj/QaNJDV9F1nZ2cxe8AOdzmjJohVreP39/wKQkpZO1t5sKleMD1/mM0Iy\n/7KepCpHZB7ydEHmhYvp1KYFi1b+yOsf/C+Y2TkWzsx+ZyOp4mkAzFPVAyJyGRAtIuVwuhMX9zUc\nCQwFLhKRa1R1ekmHPOP0xjRvWJ+/DRhFVCDA4H7X8sGsb4mLrcB5Z7Xlqp5duOXx5wgEAtx65UVU\nrhjPv2Z+Q9ruTO4bOzn/zxl1d19qVatS0vGO0raZ0Py0hvS+bzBRgQBD7urLjJmziY+N4bzOHeh1\nYQ9uHvCkk/eaK6hcqSK9LzmPgc9N4roHhpK9fz+D77yZqKjw/a5t26wJzRufSu8HhhIVFWDI7X9n\nxudfO5k7tafXBd25edAoAgG4tddlTuaLezLw+Slc9/BwsvftZ/DtN4U1s99Zm/UiBC9BaA10xmmX\n/h+gE5ABvAu8CTwE9KNg4jxFVV8UkRbAi8AjOO3XLxKRKsDXQCdVzTjW921Qu2beus9KvI6VioYX\nXAOBAOvn/J/bUYrt1OCid+tnf+xykhMTaHzmH24iy0ZSRVDVNwrZ/VzI41rBz9OCn1eGfO1KoFtw\n86LgvlSgRYmGNCaC2ZjTGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjjaVakjDGeZkXKGONpVqSMMZ5m\nF3N6WKBsObcjFF8gANFl3U5x4vJy3U5gimAjKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKdZkTLG\neJoVKWOMp1mRMsZ4mhUpY4yn2RXnEWbkK9NYqmsJBAIM7HcdLU9rmH/sy/mLmfivjyhXtgwXdzmL\nPn8+D4Axr09n0WolJyeXW6/6M+d3ahe+vBNfY+man5y8d/SlpZxWkHfeQiZO+zflypbl4m7n0OeK\ni/OPZe/bx6X9+nP7db248oIeYcsLMPLlqSz98Rcn8619aNmkUUHm+T8wcfp/nMxdzqbPpeezN3sf\nj42bTEp6Bvv3H+D23lfQPdhp2hTNRlLHISI3icgzxzhWT0Q6BB+PE5FTC3teOC1c+SMbtv7G9DFD\nePLumxkxZWr+sdzcXJ6Y8jZTht7P1JED+Or7pWxP2cn85Wv4+dfNTB8zhJeHPcjIV6Yd5zuUcN5l\nq9iwZRvTx4/iyfvvZMSEVw/P++LLTHlyEFOfHcFX8xexPTkl//jEae9RKT4ubFnzM69Yw4at25k+\ndhhP3nsLIya/fXjmSW8yZdhDTB01iK8WLmF7SipfLVxCi8YNmTpqEM89ejejwvgaRwIbSf1+PYA4\nYKGqutf2N8T8ZavpeXZbABrVrUVGZhaZWXuJi6lAWkYm8cH25QAdWzVj3rJVXN6tM62aOKOtirEx\nZO3bR05Oblharc9fspyenTo4eevXISNzD5l7soiLjSFtVwbxsbEkBhtodjyjJfMWL+fKC3qw7tfN\nrP11E13POrPUMx6Vedkqep7tfN9GdWuTsWcPmVlZxMXEkJax+/DXuHVz5i1dxZU9/5T/9duTU6lR\nNTHsuf3MilQxiMizQAfgFGAS8CEwDDggIr8C91PQzqoSIEAjoL+qfiIiDwSPRQH/p6rDSyNnclo6\nzRs1yN9OrBRPctou4mIqkFgpnj17s9mwdTu1k6qyYMUaOrRsSnR0FDHR5QF47/Ov6Xpm67AUqPy8\nIadKiZUqkpyWTlxsDIkJldizdy8bNm+ldo0kFixbSYdWzQF4evIbDL6rHx98/lVYch6eeRfNGxcM\nmhMrHnqNneK0Z282G7Zsp3b1qixYsZoOLZvmP7f3g8P5LXUnk4Y8EPbcfmZFqng2qOr9IlIBWKuq\nr4jIGzj99T4SkftDnltXVS8WkQuB24BPgvvPwWkmuk5Enjtez72SEtpTMRAIMKp/PwaOf5W4mArU\nqV7tsONfzl/M+198w6vDHyrtWMd0VN6H7mHg2AnExcZQp0YSecB/Pv+KNs2EOjVLrRH0CQntWhkI\nBBh13z8Y+PwUJ/MRr/G7zwxlzbqNPDR2Ih++8BSBwB+uhd7vYkWqeBJFZB6wH6hWxHPnBj9vxhlV\nAWThNAQ9CFQFEnGai5aopMTKJKfvyt/esTOdapUr5W93aHE600YNBGDsm/+idlJVAOYsXsGkf3/E\ny8MeJD42pqRjHTtvlcok70wryJuaRrXEygV5Wzdn2nNPOnlfnUrt6kl88e18Nm37jdnzF7E9JZVy\nZctSo1oVOrVtHZ7MiQkkp6UfnrlyQkHmlk2ZNnqIk/mN6dROqsbKX9ZTpVJFalarQtOG9cnJyWHn\nrgyqJFQ66s83R7OJ86KdiTP/1FVVuwH7inj+wZDHARGpj3M6eGHw6zeWRkiAzme0YOa33wOwau0G\nkhITiIupkH+837BnSE3PICt7H7O/X0qnNs3ZvSeLMa+/y6TB95MQ5onozme2Yeac75y8P68lqUrl\nw/MOeILUtHSy9mYze/73dGrbiucGPch7E8Yw/YWnueqintx+Xa+wFSiAzm1bFrzGv6w/OvPQ0aSm\n7yIrO5vZC5fQqU0LFq38kdc/cLo7p6TtIit7H5Urxocts9/ZSKpoDYB5qnpARC4DokWkHM6pW3Fe\nv6rADlXNFJG2QH2gVFaza9v0NJo3bkDvh58gKhBgyG03MOPLOcTHVOC8ju3odX43bh46hgBw61V/\npnLFeKZ/+hVpuzPpP3pC/p/z9H23UqtaldKIeHje5qfT/LRG9L73MSfv3f2Y8dks4mNjOO+cs+l1\n0Xnc/OjjBAIBbu19JZWDE9Juatu0ifMaPzicqKgAQ267kRlffOO8xp3a0+uC7tw8+Gknc69LqVwp\nnt4XncvA8S9z3cOPk73/AINvu5GoKBsfFFcg9JzZHE5EbgJaA52BvcB/gE44p2rvAm8CDwH9KJg4\nT1HVF0WkBfAicC7wfzjvBM4FooE2qtrzeN+7Qe2aeetnfVAKf6uSd2qPv0BUFOu/nel2lGI7tZNz\njdj6rz50OcmJCZzW/g83kWUjqeNQ1TcK2f1cyONawc+HLnxZGfK1K4Fuwc0LSjqbMX8UNuY0xnia\nFSljjKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKfZxZxeFQhA+QpFP88LAgHIy4MD\nRd3W6CGHViCIdf9WG3N8NpIyxniaFSljjKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSlj\njKfZxZwRZuSkN1j6488ECDDw9ptoKY3zj30573sm/nOG02a9a2f6XH6h0wL8mQmkpO9yWoBf+1e6\nnx2+ppsjp7wVbFkOA/9x4+Ety79bxMR3P3BalnftSJ9LL3DyPjuxIO/frqT7WW3Dlhdg5IRXWLpa\nnTbrd/Wj5ekhreHnzmfi1H85mXt0oc9f/syCpSvoP+xpGjeoB0CThvUZfM8/wprZz6xIHUdwjfMW\nqvpgIcfqATVUdaGIjAOeV9X14c4YauHy1WzYsp3p455k7a+bGfDsRKaPc1pC5ebm8sSE15gx4WkS\nKsbRb9BIenZqz+JVSosmjbjl6svZ8lsyfR8bEbYitXBFMO+zj7P21y0MGDeZ6c8+XpB34uvMeGEk\nCfFx9BvyND07tmPxqp9ocVpDbul1mZN34FNhLVILl65kw+atTJ8whrUbNzFg9HimTxhTkHn8FGZM\neY6EivH0e2Q4Pc85G4D2rVswfvijYcsZSaxI/X7ea7O+ZAU9O7UHoFG9OmTsDmlbnrGb+LgYEhOC\nLcDbtGDekhVceX63/K8Pdwvw+UtX0bNju2De2k6b9cNalsce3rJ8yUquPK9rQd6U8Lcsn794WX7h\naVS/Lhm7Mw9vDR8X0hq+bSvm/bCM2jWSwpox0liRKoYSaLN+dfA5B4EfVPXe0siZnJZO89Ma5m8f\n1rY8vwX4NmpXr8aCZavo0KpZ/nN79x/EbympTHo8fL/tk9PSD29ZXime5J2hLcv3FuRdvvrwvA8M\n4beUnUwaFt6Oy8k70w5vDZ9QieSdaQWt4bNCWsMvXUGHNi2pXSOJtRs3cfvAEezK2M2dN/amc7sz\nwprbz6xIFc/vbrMuInOAp3DaWGWKyMci0l1Vvyrt0Hkc0bb8wTsZOHbiYW3LD3l33AjWrN3AQ6Nf\n4MOJY1xpAR7aXS0QCDDqgdsZOG4ycTEx1KlxRMvysY87ecdM4MMJT7vWsvyo1vCP9mfg6PHB17g6\neXl5NKhdiztv6M1F3c9h09bt3Hj/QD6bOplyZcu6ktlv7N294jnUZv0TTrzNehPgZ1XNDO6fDZTK\nr9GkKpWPbgEe2ra8VTOmPfs4k594lLjYGGpXr8bKn9exbUcKAE0bNchvAR4OSYlH5N2ZRrXE0Jbl\nzZg2ZhiThz9MXExI3uTUkLy5YcsLkFQ1keSdoa/xTqpVCXmN27Rg2vhRTB45hLi4GGrXSKJ6tSpc\n3KMLgUCAerVrUjWxMjtSUsOW2e+sSBXtpNqsA3nBz4cc6n5c4jq3bc3MOfMBWPXzuqNbgA98qqAF\n+Pwf6HRGSxatWM3r7/8XgJRgS/NwtQDv3LYVM+cucPL+sp6kxCPyDh4V0rJ8cUHL8hkhebPDlxeg\nc7szmPnNt07mn9aSVCWRuJiYgsyPDCtoDT/vezqd2YaPP5/Nq9OdRq/JO9NITUsnqWrpd4iOFHa6\nV7QGnFyb9Z+A00QkXlV3A12BEaURtG1zoflpDendf5DTAvzOm5kxc7bTtrxzB3pddC43PzYi2Lb8\nCipXqkjvS85n4LMTue7+IWTv38/gu24OWwvwts2aOHkfGEJUIIohd/ydGZ9/7eTt1J5eF/bg5kEj\nnbbwV1/u5L24JwPHTea6h4aRvW8/g+/4e1hblrdt0ZTmTRrT+66Hndbw997GjE+/dDJ36UivS87n\n5oeGEgjArddeReVKFeneuQMPjhjLrG8XcODAQYb2v91O9U6AtVk/jpJos66q3UTkSuABnMI2V1Uf\nK+p7N6hTK2/93E9K/i9VCk495yIA1n/9X5eTFN+p3S4FYP2CWS4nOTGBWvKHa7NuRcqjrEiVLitS\n/mFzUsYYT7MiZYzxNCtSxhhPsyJljPE0K1LGGE+zImWM8TQrUsYYT7MiZYzxNLstxqsCAShT3u0U\nxRMIwMED5G5b53aS4gs4v58Dp4Tvvj/z+9hIyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwx\nnmZFyhjjaVakjDGeZhdzRpiRL73C0tU/OW3L7zyiBfi3C5g4LdgCvHsX+lxxidMC/PHRBS3AT63P\n4LtvDVveUe/+j2VrNxEIBHjsb5fQ8tQ6+cfemTWfj79bSnRUFM0b1Oaxv11C1r79DHj1PVIyMokp\nX44n+/6VapXCe0HmU+MmsGzVagIEGHDfXbRqdnr+sS++mcvEN6ZSrmxZLunZgz69/sKerL088vhI\ndu3ezYH9B7jz5hvocnaHsGb2MytSRSis1bqIbAjuyzzGlx35ZzQA3lPVdqUQMd/CZSvZsHkb018c\n7bQAH/MC018cDQRbgL8wmRmTgi3AHxtOz85nAdC+VXPGDwt/C/DvdT0bf0vlnwNvY+3WHQx6fQb/\nHHgbAJl7s3nt0zl8OvJ+ykRHc8vY11m29leWrt1E3WqJjLvjWhb9tIEX//MFw2/8S9gyL1y8lI2b\nNjP95Qms3bCRAU+OZvrLE4Dgazx2PB+8MYWEShXpd/8j9Ox6Dl98PZdT69XlgTv68VtyCjfedT+f\nTn8rbJn9zk73Isj8xcvyC0+j+nXJyHRagAPBFuBxJCZUIioqio5ntGbeD8vcjMv8NWs594ymADSq\nlURG1l4y92YDULZMNGXLRJO1bz8Hc3LI3n+ASrExbPwtJX+01a5JA374eWNYM3+3aDE9u57jZG5Q\nn10Zu8ncsweAtPRdVIyLI7FyAlFRUZzdri3zvv+BygmVSM/YBUDG7t1UDrZhN8VjI6niOVVE/g+o\nCzx3aKeItAYmAAdwOsH0UtWdIvIwTueYXOAxYH3I11wE3A1cqqo5JRkyOS2d5k0a528nViqiBXjr\nFgUtwAeNYNfuTO68vjed27UpyVjHlLJrN83q18rfrhwfS8quTOIqnEL5smW547IenP/IWE4pV4aL\nOrSiQY2qNKlTg29W/MT57Vrwva5nW2r6cb5DKWTeuZPmpzfJ306snEBy6k7iYmNJrJzAnqwsNmza\nTO2aNVjww1I6tG3Drdf/jRn/+5TzrrqOjN2ZTB47MqyZ/c6KVPE0AdoCFYFlwKHikgTcrapLRORx\n4DoR+RSnQJ0NNAQeJdhnT0QaA4OBi0q6QBXmqDbrj9zLwDEvOC3Aa1Ynj5AW4N3OYdO27dz4wCA+\ne2uSO33hQhoXZe7NZsr/vuaTp+4jtkJ5+o55jR83bePKLmeim7fTZ+QU2kkDEivGhT9niKParA9+\nlAFPjiY+NpY6tWpAXh4ffvo5tWok8eq40fz48y8MeGoMM16f7GJqf7EiVTxzVfUAkCoiGUC94P7f\ngKdFJAaoBUzDaaG+QFVzgV+AW4JzUrE4fftuUNVdpREyqUoiyTvT8rePagHeugXTnnd+i4995S1q\nVw+2AO/eBYB6tWpStXJldqTspE7N6qUR8TDVEiqSsqtgWm9Hekb+JPjabcnUrVaZyvGxALRtUp9V\nG7Zwet2aDL3+cgD2ZO9j1pI1pZ4zVFLVqqSk7izInJJKtSoF3Yg7tG3DO5PGAzD2pZepXbMGC5cs\n45yz2gNw+mmN2ZGSSk5ODtHR0WHN7lc2J1U8RzYnPLT9PPC8qnYFDv1qzKHw17UOMAe4o1QSAp3b\ntWHmN/OAY7QAf3R4QQvw74ItwL+Yzav/OrIFeGJpRTw8b/PGzPxhJQCrN24hKaEisRWc5WlqV6nM\n2m3JZO8/4Px9NmyhfvWqfL1cGf/B5wB8PH8pXVo2KfwPL63MHdrx2VffOJn0J5KqViEutuA1vuW+\nR0jdmUbW3r189e08OrY/k/p1arNslVNMt2zbTmyFClagToA1By1C8N29e4F2QCKwKHioOfAdcCXw\nK/AJMB+YgjNiagdUASYB9wHvAe2BWcBIVZ15vO/boG7tvPXffXnCece+/CbfL19FVFQUQ+75B6t/\nXkd8XAznndORmXO+46W3pxMIQN9eV3Bpz25kZmXx4JPPsjtzDwcOHuDOG3rT9awTexPy1I7nwsED\nrP33Syec99n3PmPRTxuICgQY1OdS1mzcRnxMeXq2bc702Qv54NvFlImKok3jejzY60Ky9x+g/0vv\nkL5nL5ViK/DMrdcQH3PKCX/fRtfeB8CGpQtO+GufeWkKi5YsJxAVYOiD97L6p1+Ij43lvG5dmDn7\nGya89hYBAvS97mouu+A89mTtZcCTT5O6M42DOTnce2tfOrZre8LfF4DEWn+45qBWpIoQLFIXAOWB\nxsBonDmmFsC1OAVsLfA68CJwMdAT+CsQAAbgTJy/p6rtRKQR8DFwlqruPtb3/b1Fyg0nU6TccjJF\nylVWpIxXWJEqXVak/MPmpIwxnmZFyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjj\naXaDsZdF+ei6vbw82L/P7RQmAtlIyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjj\naVakjDGeZhdzRpiRE15h6WolEAgw8K4j2qzPnc/EqcE26z260Ocvf3barA97uqDNesP6DL7nH2HL\nO+rfn7Fs/WanzXqvC2jZoHb+sXdmf8/HC5c7bdbr1eKxqy9gR/puBr39EfsPHiQnN49Hrzqf5iG9\n+8LB2qyHlxWpCLJw6Uo2bN7K9AljnDbro8czfcIYINgCfPwUZkwJtll/ZDg9zzkbgPatWzB+uAtt\n1n/awMYdO/nnwzezdlsyg97+iH8+fDMAmXv38drn8/j08bspEx3FLeOnsmzdZj5bsppz25zONV3O\nZMnaTTz/0VdMufu6sGW2NuvhZ6d7EWT+4mX5hadR/bpk7D6yzXpsQZv1tq3cb7Ou6zm3tQDQqGY1\nMrKyydzr3FpzeJv13GCb9QpUjoshPdP5O2Vk7SUhLuaYf35psDbr4WcjqZMgIpVwWlVVAP4P6AcM\nwmmjngOsUtVbRWQBcK2qrhWROsCHqnpmSedJ3plG8yaN8rcTE4pos96mZUGb9YEj2JWxmztv7E3n\ndmeUdLRCpWTsoVm9kDbrcTGkZGQSV6E85cuW4Y5LunL+4PGcUrYsF7VrToPqVbixx9lc8/SrfLRg\nOZnZ+5j6wN/DkjU/s7VZDzsrUifnBmC1qt4rInfgtLCKBS5U1XQR+UZEWgJvA9cATwGXAf8MR7ij\nWoA/2p+Bo8c7bdZrVCcvL6TNevdz2LR1OzfeP5DPpk52p816iMy9+5jy6Vw+GX4XsaeUp++4t/hx\n83ZmL/+JC85sxm0XdWH2ip8YM+Nzxv/jatdyWpv10meneyenKfBt8PFHwc87gQ9F5Ovg8So4RenK\n4PE/U0pFKqlqIsk70/O3j2qz3qYF08aPYvLIIcTFxVC7RrDNeo8uBAIB6tWuSdXEyuxISS2NeEep\nVimOlIzQNuu7qVYpDoC125OpWzWBynExlCsTTdvG9Vj16zYWr91El2bOaLHT6Q1ZuXFrWLIeUtw2\n65PHjiQ+No7aNWuwePnKQtusm+KxInVyAkBu8HEeUA6YAFwTbL2+AEBVU4HNItIeiFLVLaURpnO7\nM5j5jVMzC22z/siwgjbr84Jt1j+fzavTj2yzXqXQP7/E8zZtxMzFqwFY/es2khLiiT3lUJv1BNZu\nTylos75xK/WrJVIvKZHlG5yXb8XGrdRPCk9L+PzM1mY97Ox07+SsxWmn/h5wERAPZKjqdhGpGzxW\nLvjct3EK2JTSCtO2RVOaN2lM77seJioQYMi9tzHj0y+Jj43hvC4d6XXJ+dz80FACAbj12quoXKki\n3Tt34MERY5n17QIOHDjI0P63h+1U74xGdWlerybXjnnNabPe+yI++G4p8RVOoWeb0+l7Xidueu4t\nykRH0aZhHdqdVp/6SYkMevtjPv3BKW4Drr4wLFkPaduqBc1Pb0Lvfnflt1mf8b9P89usX335JfTt\n/xABAtx6w7UkJlTimisuZcCTT9Pn9ns5mJPDsIfvC2tmv7MOxidBRKoCHwIHgM+BvsAcoDmwDFgN\n3Ay0wRl1bQcaqmp6oX9giAZ1a+etXzCrlJKXrFPP6gEH9rP2nXFuRym2Rjc5l1xYB2Pvs5HUyYkF\nHlfVz0SkI9BVVW864jnPAohId+Dj4hQoY0wBK1InZxdwv4gMwRkp3VPYk0RkOHAB8NcwZjMmIliR\nOgnBUdEFxXjeUGBo6ScyJvLYu3vGGE+zImWM8TQrUsYYT7MiZYzxNCtSxhhPsyJljPE0uwTBlIwy\nZYmq26To53mGc6dF3sH9Luc4MX+4y82xkZQxxuOsSBljPM2KlDHG06xIGWM8zYqUMcbTrEgZYzzN\nipQxxtOsSBljPM0u5owwfmuzPvLlqSz98Rcn7619aBnSN/DL+T8wcfp/nLxdzqbPpeezN3sfj42b\nTEp6Bvv3H+D23lfQvUN4+gTmZx4/iaWrfyQQgIH33E7LplKQec48Jr71TyfzuV3p89fLAfh45ixe\needfREdHc8/NN9Ct01lhzexnVqROgoikqGrVI/bdBOxS1Q9E5CpVfS9cefzWZn3hijVs2Lqd6WOH\nsXbTFgaMe5npY4cV5DvlZIQAACAASURBVJ30JjOeH0FCfBz9ho6hZ8czWbz6Z1o0bsgtV/2ZLTtS\n6DtoVFiL1MIly9mweQvTJ41j7YZfGTDqWaZPGleQedwEZrwywWmz/uAgenbpRPny5Xnx9am8/+qL\nZGXt5YXX3rYidQKsSJUwVX0DQETKAffjdJIJi2O1WY+LjTmszTqQ32a9do2kcMU7Ou+yVfQ822nk\n3KhubTL27CEzK4u4mBjSMnYTHxtDYqWKTt7WzZm3dBVX9vxT/tdvT06lRtXwtrSa/8MSenbp5GRu\nUI+M3U6b9bjY2OBr7LRZB+h4ZhvmLVrCKeXL0andGcTFxBAXE8MTD/cPa2a/syJVBBH5Eaf7SwBI\nA7qr6iIR+QyoIiKPA+cDqcClwBAgBacxaEsReQmn7foUoCFQFhiiqiXeCsZvbdaT03bRvPGpBXkr\nxpOctou4GKc47dmbzYYt26ldvSoLVqymQ8um+c/t/eBwfkvdyaQhD4Qla37mnWk0l4JT6MSESiSn\npjlt1g+9xpu2ULtmdRYsWUaHM1oBsDd7H7c/OpSM3bu56+/X0zFMr3EksCJVtB+AFjj98xYBHUVk\nMVAd5y7V91R1iIh8B7QK+boxwFmqeoeIXA9sU9Wbg22wZh3x3FLhtzbroc3VAoHA/7N35nFVVfv/\nfg4qJqAoIGJYaaWrxAnHKzhl2mB5K9Oy8t4Gm8ix0sqxUsshZ1HMLLuVFjev1a97b2WlVk44omn1\nqUzLIRUVxQnN9PfH2sAREfV+4bDP8fO8XryEs/fZ571X8WbtxeY8jHryMQZNmmHzVql82vm8N/Z5\nvv/lV/qPS+GjKS/j8ZTMn956G+E8Hg+jBvZj0KhxhIWGUq1qTO72/VlZJL/0PDt27eL+3s+wYO7b\nJZbZ39Df7p2br4C/AInAFKAZUBdYgxWBrnf22w6En+UYCcDtxphF2Mu/cs7lYJHib5r16IiKZGR6\n582ksnOpBNC07rXMHjOUV5/vR1hICLHRldnw82Z+z7D5rr3yCv7880/2HcjySV6A6KhIMvZl5mXe\ns5fKXpecTePrMXvqeF4dM5yw0FBiY6oQWaki8XVqU7p0KS6PvZTQkHLs23/AZ5n9HS2pc7MIW1J/\nwQpAw7GFtRA4kW/fs/1oPA68JCJtnI+aIlLk7xHid5r1hnWZv2SlzfvzZqIjKxEWUi4v7/Nj2Lv/\nAEeys1m0Yi0JDeqwasMPzPrgvwDsyTzAkexjVKpQ3id5ARKbNGT+om9sZvnJata9x7jfIK8xXk5C\n43haNG1E2pp0Tp48SeaBLI4czaaSs9amnBu93DsHIvKjo0z/Q0QOGmN2ArdjbcWFcZK88U0DbgPe\nNcZEA31FZGBRZ/U3zXrDa2sRd3V1uvZ7kaAgD0Mfv595X3xN+ZBytE9oQpcbr6P7kNF4PB4e7dKR\nSuHl6Xrz9Qya/Br3PTOM7ON/MOTx+wkK8t3P2oZ144gzNema1JcgTxBDn+rBvP/Op3xYKO1bJdKl\n4810f2qAzdytK5WcX1Tc0KYldz/eB4DBfZ/waWZ/RzXr54ExZg720u5xY8zDwDMiUsv7FgRjzFwg\nGWiDXTh/Fata3wjcA0wHagOlgBdE5JPCXtPvNOunTrF54UclHeW8qXH9HQBsXrW4hJNcGJ7o6hfd\nQpaWlEvRkipetKT8B51zKoriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRXoyWlKIqr0ZJS\nFMXV6J/FKEXDnyc4lbGtpFNcAPaeSE8p/RZwOzqTUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0pRVFc\njZaUoiiuRktKURRXoyWlKIqr0TvZAgy/06y/+zHrNm3FAwy8tyN1r7wsd9vsL5fx8bK1lAryEFe9\nGgPv7ciRY8cZMPOf7D1wiHJlg3n54S5UDvediAHg5UnTWLfhOzweDwP79qBe7Wtyt33x9RJS3pxN\ncHAZbml3Hd063877H/+X//fpF7n7bPhBWPvlf3ya2Z/xm5IyxrQBeopI55LO8n/FGLMFqCMih4ry\nuH6nWf/hF37dtZf3Bj/Bph27GfTGXN4b/AQAh45m88YnX/HZ6P6ULlWK7mNfJ33Tb6T//CuXVY5k\nUo9urPpxM1M++JxhD3TyXea16/h16zZSX0tm05ZfGfjSK6S+lgw4Yzx+Ch/Mmm41608NoF2rRLp0\n7ECXjh1yn//Jl4t8ljcQ0Mu9AOJsmnXgNM16UFBQrma9JFn+/c9cH18bgKsujSbr8FEOHc0GoEzp\nUpQpXZojx45z4s8/yT5+nPDQcvy6ay/1rqwGQONaNVjz0xafZl62ag3tWiXazNWv4MDBQxw6fBiA\nzP0HqOBo1oOCgvhL43iWrlx92vOnvvE2Tzz4N59m9nf8ZiblEGaMeQeoD7wPfABMxeqjDgL3Y83A\nfbBOvIbAS8BNQDzQX0Q+NMbcBTzl7LNaRPoYY+KBacAx5+Nu4E9gFlAJO1a9gG1YF1+C89hioAWQ\njjM7MsaMBTYA84A5QCgQAvQSkRXFNTj+plnfc+AQcVfE5uUtH0rGgYOElbuEsmXK0OO267nhmTGU\nLVOGDs3qUSOmMrWqxfDVeuGGxnVZ8cMv7NiTWcgrFEPmvZnEmVp5mSuFk7F3n9WsV6rI4SNH2LJ1\nG7FVY0hbk07T+Pq5+67/7geqVqlM5ciIgg6tnAV/K6nawDXYGeBmrD6qv4ikGWP6YctpIdDA2a8V\nMBuogZV79jLGfAG8DDRwCuVjY8x1wB3ANBF52xjTFogBugCfishMY0xtYJKItDfGjAeeA8oBL4vI\nfmNMQXljgJlOMbYFngXuLIZxKRD/06zn5T10NJtX/72QT0b2I7RcWR4c8xo//LaDO1s1Rrb9zn0v\np9DEXElEhTCf5/TmDM36kGcZ+PIrlA8NpVrVqqe54+d+/F/u6HCj70P6Of52ubdGRI44azkeoLaI\npDnbFmJnSwDrROQY8Dvwo4gcBnZh7cO1gJ+81oMWOc/7CBhijBkO7BaRH7CzpccdPfo08jTq/wCa\nANeKyJxC8u4C7jTGLAZGA8WqBvY7zXrF8uw5kLcst3v/QaIds++mHbu5rHIElcqHEly6NI1q1mDj\nlu0Ely7NC3+/g9kDk3jkljaUK1vktvrCM0dFsmffvrzMe/ZSOTLvP2vT+PrMSZnEq2NfpnxYKLFV\nq+RuS1u7jvi6cT7NGwj4W0nl15p7E4y97Mu/n/fnHuzPNm93WTBwUkS+xBbPD8A/nNnVcewlWo4e\nvanznNLYy7dwY0zOlMNbYJjzWF9gu4i0AJLO5wT/L/idZj2uFp+t+tbm3bKd6IrlCS1XFoDYqEr8\n8nsG2cf/AGDDlm1cERPFV+t+YNK8+QB8vGwtreoWOIMtvszNGvPZwq9tZvnRatZD88b44aeeY+++\nTI4cPcrCxcto3qQRALsy9hBarlyJzFD9HX+73MvPBmNMcxFZBrQGVp3Hc34EahpjyovIQed5I4wx\nPYH/iMhsY4wHO7tKwyrVlzmXezeJyHjgaSAVe7n3FHaWlAVUNcb8gr20XAtEAeud170DW4jFhr9p\n1uNrXkFc9VjuGTGNoCAPQ7rdxgeLVxFW7hLaN6rDQze14v7RMyhdKogGV19B41o1yD7+B3MWLOPu\n4VMJDw1h3OP3+CRrDlazXouuj/bCExTE80/3Zt5/PqV8WBjtW7fgrr/ewkNPPosHD4/+/R4iHM16\nxt59RFSq6NOsgYLfGIzz34JgjNmDXXOaip3FZAIPYhfLe4pIZ2NMHSBZRNrk+7wTtmhOAotFZIAx\n5iZgBHAAu3D+IHAEeBOIxurRewMZ2EX7BOxMdAVwG3CDc0wB9gJfYxXrbwFbsQr2icBw4HnOcQuC\n3xmMT/zBL/96taSjnDdXdu0NwJa1y0o4yQUSWe2iMxj7TUldbGhJFS9aUv6Dv61JKYpykaElpSiK\nq9GSUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRX4+9/uxfAeDj976DdjAdK\nB+OpUaekg5w/p+zfop86erCEg1wY/vJ/RFGiMylFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVo\nSSmK4mq0pBRFcTVaUoqiuBq9mTPAGDn1NdK/EzweD4N6PkLda/JEll8uXk7KO6kElylDh7at6HbH\nraSlf0vfF0ZxdfXLAah1ZXWG9H7Md3mTXyP9ux9s3l6Pnpn37Zy8LenWqSNpa9efmbfP4z7LCzBy\n2uukf/cjHg8M6vEwda+pmZd5SRops9+3ma9rQbfbb8ndln3sGB279yap2110uul6n2b2Z7SkAogV\n6d+yZdsOUqeOZdOvWxk4ZhKpU8cCcPLkSYZPfpV5MyZSsUJ5Hnn2hVwle5P6dZj84oCSyzttnM07\neiKp08bl5Z00nXmvTXLyPk+7Fs3z8g4b6PO8ACvWbWDLtt9JTR5tM7+STGry6LzMU2Ywb/p4m3nA\nMNolNiOmchQAKe+8T3iF8iWS25/Ry70AYvmadbnFc9UVl5F18BCHDh8BIPNAFuXDQomoGE5QUBDN\nG9Zn6er0koxbQN7Drs4LsHzNetolNgOczIcKGeP4eixdvQ6AX37bxqZft9K6WaMSy+6vBORMyhhT\nCpgBXIkVdQ51PnqKyAbHsReFtRf3A8KwOqqrsB69E8BqEeljjHkBqAZcDlTFat0/9dJinQBWicjT\nxpgKwBwgFCsP7SUiK4wxPwOvAh2BskA7x/lXpGTs209cratzv46oGE7GvkzCQkOIqBjO4SNH2bJt\nB7Ex0aSlr6dpg7rExlRh069bSRo0nANZB+lx/z0kNo4v5FWKMm9mvrwVCsi7ndiYKqSt/dbJG23z\nDhzGgaxD9HjAd3kBMjIziat1VV7m8IIy54zxBprWt3/POHr6LIb0epQP5vuHAchNBGRJAfcCv4tI\nd2NMFLAA2HeWfeti1etlsMLPBiJyyBjzsWMxBogVkRuMMXWBtxxt+mCguYgcM8b80xiTiHXyzRSR\nD40xbYFngTux4/yDiLxijHkPuB74sHhOPQ9vXZnH42HUc30ZNGYSYaEhVIupwqlTp6geeyk9/n4P\nN1/Xgq07dnL/UwP57J0ZJWLa9bareTweRg14kkGjJxEWGkq1qk7eapfS4/57uPm6ljbvkwP5bHbJ\n5IXTtdUej4dRz/Zh0CtTnMzRnOIUH85fSIPahmpeynXl/AnUkkoAWhpjWjhfl+Ps9uB1TtHEAT95\nCTsXYS3GAF8CiMi3xphYIA47s/rMGAMQDlwBbACGGGP6YWdMh71e5xvn323O/kVOdFQEGfsyc7/e\nvXcflSMr5X7dtEFdZk+26yfjXvsHsTFVqFI5kg5tWwJweWxVoiIqsXvPXqpVjSmOiKfnjcyfd++Z\neaeMsXlnvElsTDRVKkfRoW0rr7wVfZY3L/N+r8z7qBwZkZe5fh1mTxppM898m9gq0XyxZDlbf9/F\nouWr2Jmxl+AypYmpHEVCo/o+yezvBOqa1HHgJRFp43zUxFqJcyiTb1+wPxS93wkjGGs4hjPH6Tj2\ncjDn+PEiMgfoC2wXkRZAUr7nnPD6vFjecSOxcTzzv14KwMYffyY6MoKwkJDc7Y88+zx7M/dz5Gg2\ni5auIKFRfT7+fBGvp84D7OXX3sz9REdFFke8M/M2acj8r5Z45Y08Pe8z+fM24OPPF/L6e07evb7N\nC/nHeJMzxuXyMj83LC/zspUkNKrPhCH9mTttLKnJY+jcoR1J3e7SgroAAnUmlYZVn79rjInGlkcW\ndk1pA5Do/OvNj0BNY0x5Z72oNVa73g5oAYwxxtQDfsWq1K81xkSLyG5jzIvYNbAoYL1zvDs4++yt\nWGhY51rial1F1579CfJ4GNoniXmffkH50FDat2xOl1tupHv/oXg8Hh69twuVwsO5LrEp/UaMZcGS\nNP744wTP933CZ5dONu/VdO3RjyBPEEP7Ps68T76gfFgI7Vsm0OXWG+neb4jNe18XKlUM57rEZvQb\n/goLliznjxMneP5J3+UFaBh3jR3jXs8SFBTE0N6PMu/TLykfFkr7Fn+hyy3t6f7sCzbzPXdSKbyC\nz7IFKgGpWTfGlAamA7WBUsAL2NnQBOAnYBN2jWoRdjG9s/O8nMXwk8BiERngLJxfDVQAagB9ReRL\nZ9+B2BnaWqAX0Bh4C9gKJAMTgeHA80AdZ61rLLBBRN4s7ByqX1bNvzTrwOYVC0s4yflTo2kbADYv\n+6Jkg1wgnmrXXnTvexeQJVWUOCW1R0SSffm6WlLFi5aU/xCoa1KKogQIgbomVWSIyAslnUFRLmZ0\nJqUoiqvRklIUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI3egqAUEafgj2Pn3s01OPdElvbpXy4p\n/wM6k1IUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6MlpSiKq9GbOQMMv9Os\np8wi/fsfbd4nHqKuyfPwfbl0BSmz59q8bRLpdnuH3G3Zx47R8ZEnSbqvM51ubOuzvAAjk2eQvjFH\nDf8Yda/1HuNlpLz1HsHBZejQtjXdOnUE4OPPFzLz3bmUKlWK3g91o03zpj7N7M9oSRWAMeYmoIaI\npJxjv1uBzsBzwIsi4rvv7gLwO836uo1s2f47qZNHsunXbQwcN5XUySPz8ibPZN60V2zegSMcZbk1\nw6TMnkt4+TDfZ07/li3btpOaMp5NW36zaviU8XmZJ6Ywb+YUm/mZobRr0ZyyZYNJfnM2/3ptMkeO\nZjPljXe0pC4ALakCEJFPL3D/nUCJFhScXbMeFhpymgIcyNWWx8aUnLBy+dr1tEto6uStlqsst3kP\nUj7UK298PZauWUenG9taZflv20pEWb58dTrtWjS3matfni9zQWO8lkvKBpPQKJ6wkBDCQkIY3r+3\nz3P7M1pSBWCMeQC4FaiMNcvUB9aKyMM5FmOsbWaTs391YK6INDbG3Ic1x/wJbBSRR53jtXCOZ4BX\nROT1os7td5r1zP35lOXhZGTud/JW4PBRr7zrNtC0XhwAo1/9B0N6PswHny/ySc7TMu/LJM7rkjQi\nPJyMffvyxviotxp+PU3j6wFw9Ngxkga8SNbBQ/R88D6aN2rg8+z+ipZU4TQC7gZ2A9uMMRWBIcAL\nIvKRMaagy8FQ4CYR2W+M+dopNbA69wSgJvAeUOQllR//06zny9u/F4PGTXPyOsryzxe5Sll+inyZ\nBzzNoFETCQsLoVrVmNxz2n8gi+QRQ9ixazf3932OBf98E4/nohO//E9oSRXOz86lHMaYHVg9em1g\nqbN9EXBzvufsAz5y9OvXAjl63WUi8qcxRjXrOXkLUpZHeOWtH8fsCSNs3tffcZTlaXnK8j17CS5T\nhpjKkSQ09I0R+Iwx3pNPs96gLrOTX7GZZ8wiNqYK2ceOEV/nWkqXLsXlsVUJDSnHvv0HiKxU0SeZ\n/R29BaFwTuT72uN8FKhfN8YEA1OBu0WkNdakXNCxVLMOJDaqz/xvltm8P/1yprJ84Aj2Zh6weZev\nIqFhPSYMfpq5U8eQOmUUnW9uR9J9nX1WUOCo4Rcttpl//JnoqHxj3H/IGWr4Fk0akrZmHSdPniTz\nQBZHjh5Vs/EFoDOpC0ewpuLPgOvybSsPnBCRncaYy5z9fPaGRX6nWY+7hriaV9K1z0Cbt9cjzPts\ngc3bohldbm5H9+eG2bxdO7niG7thndrEmZp0feJpgoI8DO37BPM++dxmbpVAl4430b3fYDzAo/fd\nRSVnEf2GNi24O+kpAAb3SSIoSOcH54sajAvAa+G8uog0dh5bhb3dIAKYBWwHfgHCsBr3nIXzN4E4\nYB3wHdAdq1s3ItLPGBOG1axXLyyD/xmMT7F5yWclHeW8qZF4EwCbV35VwkkuDE/MVRfdQpaWlEvR\nkipetKT8B51zKoriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRXoyWlKIqr0ZJSFMXV6J/F\nuBm/+it5D5QNLekQ588p588vsw+VbA7lnOhMSlEUV6MlpSiKq9GSUhTF1WhJKYriarSkFEVxNVpS\niqK4Gi0pRVFcjZaUoiiuRm/mDDBGJr9G+nc5CvBHz9Ssv52jWW9Jt04dSVu7/kzNep/HfZd3yquk\nf/c9HjwM6v04da81eXm/WUbK23OcvG3odudfAfh4/gJmvvu+VZZ3/xttmjfzWV6AkdPfJP2Hn2zm\npAfyqeFXkvLuPILLlKZD60S63XZT7rbsY8fp+NjTJN17J51uaOPTzP5MQJfU+erSA4Vczfq0cVaz\nPnoiqdPGAY4CfNJ05r02ydGsP59r4m1Svw6Thw0sgbzrHWX5REdZPp7UlIl5eSdOZd7ryVSsUIFH\n+g+mXcvmlC1b1irLZ07hyJGjTJn1jk9LasX679iyfSepE19i02/bGDg+hdSJL+VlnvoG86aOpmKF\nMB4ZPJJ2CU3y1PBz/lUianh/J6BL6kJ16f7OmZr1w+ehWY8uubyr02nX0ktZfvAQhw4fJiw01Mkb\nRkRF66Zr3ijeKsuDy5LQ2FtZ3se3mdd+S7uEJjbz5dVOH+Osg5QPs/ZlgOYN6rB07bd0uqENv/y2\n3arhm/rGDh1IBHRJOdaXOti1t6bAJcB0EZnpWF0OAdcAUcCDIrLWGDP+LPvuwBqNLwfuE5E1xpge\nwL1YD9+HIjLOGBMPTAOOOR93Y5Xrs4BK2DHvJSLri/p8M/Zl5tOsVyhAs56jAP/W0axHW836wGEc\nyDpEjwd8qFnfl0lcrZpeeXO08KGOsvwIW7ZuJ7ZqFdLWrqNpA0dZnp1N0nPPk3XoED0f7EbzRr77\nxs/I3E9czSvzModXyFPDh1fg8NFstmz/ndgqlUlbt5Gm9WoDMPq1txjSo3uJqOH9nYAuKS+2iMhT\nxphywCZgpvN4aRFpZ4zpCAw1xtxTyL5lReRGY8zjwN+NMZlYxVULZ/sSY8z7wIPANBF52xjTFogB\nugCfOoVXG5gEtC/uk/YWAVkF+JMMGj2JsNBQqlV1NOvVLqXH/fdw83UtrWb9yYF8NtslmvWB/Rg0\neryT10tZnnWQ5BFD2bFrF/f3eZYF779VYsryMzTr/XowaFyKlxoePvz8KxpcW4tqJThr9WculpKK\nMMYsBY4Dlb0e/8L5dxkwWkSyjTFn2/cb599tQDPsbKsmsNB5vDxQHfgISDHG1AJSReQHY0wCUNkY\n083ZN095W4RYbbm3Zn3vmZr1KWMAGDfjTWJjoqlSOYoObVsBOZr1ij7WrO/Ly3uGsrwes5Ptmtq4\nV98gtmoVso8d91KWX+pzZXl0ZCUyMr3V8Jmnq+Hr1Wb2+GE28xtziK1SmS+WrGDrzt0sSluTp4aP\niiChYT2fZPZ3LoZbEBoBbYHWItIGewmWQ875e4BTxpjWheybX5N+HPiPiLRxPuqKyNci8iXQBPgB\n+Icx5jpn315e+zYt+tN0FOBfLQFyNOuRpyvAn8mvWW/Ax58v5PX3HM36Xh9r1ps0Yv5XjrJcfipA\nWT7YK28aCY3iC1CWZ/vUbJzYsD7zv1luM//0C9GRlU5Xww96mb37D3AkO5tFy1eTEF+XCYOeZO6U\nkaROeonON7Ul6d47taAugIthJlUdWCoifxhj/gqUMsbkqM9bAv8EmmNtw1HA1rPsm5/VwGhjTAhw\nFGspfg5rLP6PiMw2xniAeCANuB1Y5lzu3SQi44v6RK1m/Wq69uhHkCeIoX0fZ94nX1A+LIT2LRPo\ncuuNdO83xGrL7+tCpYrhXJfYjH7DX2HBkuX8ceIEzz/pQ8163drE1apJ16QnrbL8yZ7M+2S+oyxP\npMutN9H96YFO3rvzlOWtW3D3430B3yvLG8YZq4bvO9hm7tGdefMXUT40hPaJTely8/V0HzDCUcPf\n7go1vL8T0AZjZ+G8PpCILZIPgQQgCygF/AFUBS4DugG/AZ+fZd+5IvJvY8ytQGcRecAY8wTwEHZh\n/EMRGenc9jACOICdiT0IHAHeBKKdY/UWkVWFZa9+WbVTm1csLGwX11Cj6XUAbF71zTn2dA81GiUC\nsHnxJyWc5MLwVK/vT++EWCQE+kwqGDiW7/JqAoDzG7uPROTf+Z5zxr7eOPv/2/l8GvY3ed7bPwUK\nuvXhzgsNryhKAK9JGWOaA8+StziuKIofErAzKRFZBlxVyPYHfJdGUZT/lYCdSSmKEhhoSSmK4mq0\npBRFcTVaUoqiuBotKUVRXI2WlKIoriZgb0FQfM0pOHa4pEOcPx7n53O58iWbQzknOpNSFMXVaEkp\niuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRmzkDDL/TrKfMIv37H23eJx7K\npyxfQcrsuTZvm0S63d4hd1v2sWN0fORJku7rTKcb2/osL+So4X9w1PCPFaCGf9cZ49b51PBzvdTw\nxeLiCEguqpIyxrQBeopIZ6/HJgKTRGRzMb3mRyJyW3EcOz9+p1lft5Et238ndfJINv26jYHjppI6\neWRe3uSZzJv2is07cATtEpvlKctnzy0RZblVw+8gNWWCo4afQGrKhLzME6cx7/Upjhp+iJcafg7/\nmjmZI0eymTLrbS2pC+Civ9wTkb7FVVDO8X1SUHB2zTpwmmY9KCgoV7Nekixfu552Cfab9aorqpF1\n6JBX3oOUD/XKG1+PpWvWAfDLb9ussrxZI99nPosa3mbOU8MHBQXRvFEDlq5OZ9mqtSQ0bkBYSAjR\nURE+V8P7OxfVTMohzBjzDtYi8z7Ws9cTKMOZevS+QDWsWr0q0F9EPjXGPI21FwcB/xWRF40xLwDh\ngMG+bXFfEfnEGLNHRKK89OsnsYqt/kV9Yn6nWc/cT1ytvHd4jggPz1OWV6zA4aNH2bJtB7Ex0aSt\n20DTenEAjH71Hwzp+XCJKMsvTA2/nqYN6gJwNPsYSc+94Kjh7/OpGt7fuRhLqjZwDbZgNgMbnccL\n0qMDxIrIDcaY0qfFKAAAIABJREFUusBb5JlgWmAL5xdjTI5V5jIR6eBorR4HvH1Jk4HHRGS9MeYt\nY8wVIvJrcZ0kBIBmvX8vBo2b5qUsP8WHny+iQW1DtapVfJ6vIM5Uwz/NoNETvMbYbtufleWlhn+O\nBe//o8TU8P7GxVhSa0TkCIAj78yhID06wJcAIvKtMSbW2fcI8BXWahwF5LjBFzv/bsPOqrwxIrLe\nOdbfi/aULP6pWfdWlu87XVleP47ZE0bYvK+/Q2yVaL5YksbW33exaPmqPGV55UgSGtYv9rw2c+Tp\nY1ygGn6szfzqLGKrRjtq+Nolpob3dy7GNakTBT14Fj065BsjY8wVwFNYC3EbwHs2lF/F7s3J/0Pm\n88LvNOuN6jP/m2U270+/EB0ZcbqyfOAI9mYesHmXryKhYT0mDH6auVPHkDplFJ1vbkfSfZ19VlCQ\nM8Y5avifC1DDD3GdGt7fuRhnUgVijOnJmXp0sJd1Y4wx9bCFFAXsFpFDxpiGwBVYCem5+M4Y00xE\n0owxrwNjReT7ojwHv9Osx11jleV9BhLk8TC01yPM+2yB1ay3aEaXm9vR/blhjrK8kyu+sa0a/mq6\nJj3lqOF7MO+Tz61mPVcNPwiPBx697658avgnAd+r4f2dgNas5yf/LQjGmD3ABuzCeTXO1KMnAVcD\nFYAa2IX0RcB/gTDs5V0poIHz+R4RSTbG1AGSRaSN18J5XSDFibJcRPoVltX/NOun2Lzks5KOct7U\nSLwJgM2rvi7hJBeGp8qVF91C1kVVUheK8xu7PSKS7OvX1pIqXrSk/AedcyqK4mp0TaoQROSFks6g\nKBc7OpNSFMXVaEkpiuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5Gb0FQiggPBF9S0iEuAOcm5uPZ\nJRtDOSc6k1IUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6MlpSiKq9GbOQMM\nv9OsJ88gfaPg8cCgXo9R91rvvMtIeSuV4OAydGjbim6dOgLw8ecLmfnuvyhVKojeD3XzuQ145LQ3\nSP9eHDV8d+pek+fh+3JJmqOGL02H61qeqYZ/uA9J3e7yuRren9GS+j9gjBmLfY/0nUANEUk5x1OK\nFb/TrOfkTRnnKMsnkZrilXfidObNnGzzPmPzli0bbJXlr03iyNFsprzxjk9LasW6DWzZvoPUKaPt\nGI9NJnXK6LzMya8xL2WczTxgOO0SmxJTOQqAlNnvE16+vM+yBgpaUkWAiHx67r2Kn7Np1sNCQ07T\nrAO5mvXYmOiSy7s6Pbcor6p+ea5m/ex513JJ2bIkNLLK8rCQEIb37+3bzGvX0y6xmc18xWVkHco3\nxqHemeuxdM16Ot3Y1qrhfy0ZNby/E9AlZYwpA/wDq53KBh4CpgKhQAjQS0RWGGN+Bl4FOgJlgXZY\nb96cAvbtBjyLFYAeBTYYYx4A6ohIP2PMeKApcAkwXURmGmPeBHYAjbDK9vtEZE1Rn6/fadb3ZRJn\nvPKGh5+e96h33vU0jXeU5ceOkTTgRbIOHqbng/fSvFEDn+S1mfcTV9NbDV+BjMz8mR01fPq3NK1f\nB4DR099kSK9H+GC+f8g13ERAlxRwP7BTRO41xnQFbgdmisiHjkr9WeBO7Dj8ICKvGGPeA64Hvsu/\nrzGmM/Ay0BjIBFZ7v5gx5hJgi4g8ZYwpB2wCZjqby4rIjcaYx4G/A0VeUvnxO806+ZTlA55i0KhJ\nhIWF5OYF2H/gIMkjBrNj127u7zuABf+cVWLK8jM068/0ZtDYZEcNbzN/OH+hq9Tw/kagl1RD8jTp\n7xljwoFkY0w/7IzpsNe+3zj/5ijSdwFD8u0bCRwUkd0Axpgl3i8mItnGmAhjzFLgOFD5LMdvVnSn\nmIffadajIk/XrO8pIG+yd94qjrL8WkdZXtXnyvLoyAgyMr3V8JlUjvDSrNevw+yJL9vMM98mNiaa\nLxbnV8OXJiYqkoRGvjMv+zOBfgvCn5x+jn2B7SLSAiv+9Ca/Ir2gfT2crkvPr2BvDbQFWjsK9mOF\nHL/I8TvNepN45i9anJc3Kl/e/kPPyNuiSXw+ZflRn5qNExs3YP7XOWr4TURHVjpdDT9gWF7m5StJ\naFifCUP6MXfaK6Qmj7Zq+G53aUFdAIE+k1qJLY33jTG3AoOBJ5xtd1C4Hj0KWJ9v371AuDGmInZm\nlQgsy/ecrSLyhzHmr0ApY8z5KNiLBL/TrNepTZy5mq5PPE1QUBBD+yY5yvJQ2rdKoEvHG+nebzAe\n8vIC3NCmBXcnPQ3A4D6P+1RZnquG7/2cVcP3ftRRw4fQvsVf6NKhPd2fe9FmvudOV6jh/Z2ANhg7\nBTETu3D+BzAcmA5sBZKBic5jz2MXvg953VawEXirgH09QB9gC3bhPOc3e3Wc7Z87j38IJABZWBX7\nXBH5t1OWnUXkgcKy+5/BGDavXFSyQS6AGk1aA7B56eclnOTC8FxW+6IzGAd0SfkzWlLFi5aU/xDo\na1KKovg5WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVoSSmK4moC/c9i/JsS+sv+\n/4lTJyH78Ln3cws5Y1s2pPD9lBJHZ1KKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVo\nSSmK4mq0pBRFcTVaUoqiuBq94zzAGJk8g/SNgscDg3o9Rt1ra+Vu+3LxMlLeSiU4uAwd2raiW6eO\nAHz8+UJmvvsvSpUKovdD3XyqLR85/U3Sf/gJDx4GJT1AXS9Z6JdLV5Ly7jyCy5SmQ+tEut12E0ez\njzFg7FT27D/A8eN/kHTvnVz3F99agUdOnk76dz/YMe6dRN1rTV7mb5aS8ta7BJcpQ4frW9PtztsA\n+Hj+AmbO+SelSpWid/e/0yahWKxmAYmW1FkwxtyLFTQ8LCLfeD0+EZgkIptLLNxZWJH+LVu27SA1\nZRybtvzGwNGTSE0ZB8DJkycZPnE682ZOpmKF8jzyzPO0a9GcsmWDSX5zDv96bRJHjmYz5Y13fFZS\nK9Z/x5btO0md+BKbftvGwPEppE58KS/v1DeYN3U0FSuE8cjgkbRLaMKajUKdWlfx8F23sX1XBg8N\nGOHTklqxdj1btm0ndfpEO8ajxpM6fWJe5olTmTdzKhXDK/BIv8G0a5lA2bJlSZ71Dv96PZkjR44y\n5Y23taQuAC2ps9MOeNa7oABEpG8J5Tkny1en065FcwCuqn45WYcOcejwEcJCQ8g8kEX5sFAiHC1U\n84b1Wbp6LZeULUtCowaEhYQQFhLC8P69fZd37be0S2hi815ejayDh/PyZh2kfFgIERWtEqp5gzos\nXfstnW5ok/v8nRl7iYmKKOjQxZd59VratUywmatfTtbBgxw6fJiw0FBnjMOIcESlzRs1YOmqtVxS\nNpiExvF5Y/yMa/8XciVaUoAxpgIwBwgFQoB5QAegiTEmE6vFWgPMB/4G9MSaiGcDFYADQFegIvC2\nc9gywP0isskY8zNWcZUI7AduERFvyWiRkLEvkzivy6WI8HAy9mUSFhpCRMVwDh89ypZt24mNqULa\n2vU0ja8LwNFjx0ga8CJZBw/T88F7ad6oQVFHKzhv5n7ial7plbcCGZn7bd7wChw+ms2W7b8TW6Uy\naes20rRe7dx9u/YdzK49e5k+7DmfZM3NvC+TOFMzL3PFcDL2ZhIWan8AHD5ylC1btxNbtQppa9fR\nNL4eAEezj5H03PNkHTxIzwf/RvPG8T7N7c9oSVligJki8qExpi3QA+vTmysiXxljrgRuF5GNxpi/\nOc/pB3wmIpONMU9iZ17bgWEistAY8xBWRPo0cCXwloj0M8YsB+oB6cV9UqfI05V5PB5GDXiKQaMm\nERYWQrWqVcjRme0/cJDkEYPZsWs39/cdwIJ/zsJTAu/AcEbefj0YNC6FsNAQqsVE4y1fe2/iCL7f\ntIX+Y6bwUcorJZIXwNsI5/F4GDWwH4NGjSMsNJRqVWNyt+/PyiL5pefZsWsX9/d+hgVz3y6xzP6G\nlpRlFzDEGNMPKIu1Ex/02n5YRDbme05DYAiAiEwAMMZcBkw2xrwIVAJWO/tmiUiODXkbEF4cJxEd\nFUnGvv25X+/es5fKkZVyv27aoC6zk8cAMG7Gm8TGVCH72HHi61xL6dKluDy2KqEh5di3/wCRziVL\ncRIdWYmMTK+8ezOpHOGVt15tZo8fZvO+MYfYKpXZ8NMvRIZXoGp0FNdeVZ0///yTfQeyiKxYLEN6\nZuaoSDL2ZeZl3rOXyl6XnE3j6zF76nibefobzhgfI75ObWeML/XpGAcCeguCpS+wXURaAEkFbD9e\nwGN/cub4DcPOrloBL3o9fiLffsXyIzSxSTzzFy0GYOOPPxMdFUlYSN77JT3Sfyh7M/dz5Gg2i5au\nIKFRA1o0iSdtzTpOnjxJ5oEsjhw96jM1eGLD+sz/ZrnN+9MvREdWIiykXF7eQS+zd/8BjmRns2j5\nahLi67Lq2++Y9a9/A7DHOZdKFcr7JC9AYpOGzF9klyk3yk9njnG/QV5jvJyExvG0aNqItDXpXmOc\nrfr1C0BnUpYoIGemcwcQfB7PWQm0BVYaYx4Dsp3jbDLGeIDbsHp1n9GwTm3izNV0feJpgoKCGNo3\niXmffE750FDat0qgS8cb6d5vMB48PHpfFyo5s48b2rTg7qSnARjc53GCgnzzs6thnCGu5pV07TuY\noCAPQ3t0Z978RZQPDaF9YlO63Hw93QeMwOPx8GjX26kUXoGut9zAoPEp3PfUULKPH2dIz+4+ywvQ\nsG4ccaYmXZP6EuQJYuhTPZj33/mUDwulfatEunS8me5PDbCZu3X1GuOW3P14HwAG933Cp5n9HdWs\nA8aYJsBbwFYgGZiILZgeIvJvY8weEYly9l2EXTjf6jwnHHtpeC/QGhgLbAGmADOAB4E5Xs+fCySL\nyKLCMlW/rNopf9GW12jSBk6dZPPiT0o6ynlTo9UtAGxetbiEk1wYnujqF91ClpaUS9GSKl60pPwH\nnXMqiuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdXon8UoRUOp0ngq\nX17SKc6fo4cBOPn5uyUc5MIodd+Ako7gc3QmpSiKq9GSUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0p\nRVFcjZaUoiiuRktKURRXozdzBhj+pln35sefN/HEU8/wwL1d6da1y2nblqatYHzydEoFBdGqRQI9\nHnmoRDICjPpsBeu2ZeDxwIAbm1E3Nip325yV3/Px+l8oFeQh7tJIBtyYZyrec+got077gMl3XUfT\n6lVLIrpfojOpQjDGvGmMubWIj9nTGPNCUR4zB2/N+kvP9GHE5Fdzt+Vo1meMeZF3Jo9m4dIV7Ny9\nh8wDWSS/OYfZyWOYPuoFvly8vDiinZMjR48yfMw4mjdpXOD2EWPGM+WVkbw7awZLlqXx8y8lY7lf\nuWUnv+7L4t3utzC8YyIvf5qWu+3QseO8sXQDbz94M+882IFNGQdYt2137vaxX6yiWiXfmW0CBS2p\nAOJsmnXgNM16UFBQrmZ92er0XM16dGSETzXr3gSXKcNrk8cTXTnqjG1bt20nPLwCVWOqEBQUROsW\nCSxbsbIEUsLyzb9zvbF//nNV5YpkZR/j0DFrPCtTKogypUpx5PgfnDh5kuw/ThBermzu80KDy1Ar\nWl17F0rAXe4ZYx4AbsLqz6sBE7DevF5YV95GEXnU2e9m4FKsIv1poClwCTBdRGZ6HTMNuNdRplcD\nPsKq1+92dqmJtcyMwRpirsRq1oeKyAJjzPVYA81O4Hfgl+I4d3/TrHtTunRpSpcu+H/HjL17iaiU\nJw2NiKjE1q3bfRXtNPYcPkrtSyNzv64Ucgl7Dh0lrGwwZUuX5onW9blh8r+4pExpbo6rQfXIcI7/\n+SfTvkonuWtbRn22okRy+zOBOpOKA/6K9eKNAEKBm0QkEbjGGFPX2e9yoBWwF9jiyEFbYiWf3rxN\nXiH9FXhXRFJEpA3QDdgNpGC1Vr+LyHXA7dhiAhgJdBOR9lg3n084m2a95+ARuZr1U6dOsf/AQaYM\nH8zIAU8ycNREXG8Qcmm+Q8eOM2Pxt3zSsxPze9/Jt9sz+GHnPmYu/pbODWtR4ZKyJR3RLwm4mZTD\nVyJyAthjjMkE9gMfGWMArgVyfhSuFJFTQLYxJsIYsxQ766qc73jvAp8BLwO3Ao8AGGOCgH8AvUVk\nvzEmAWhpjGnhPK+cMSYYqC4i63KyAeUoBvxNs36+RFeOYs+evblf79qdUeBloS+oHFaOPYeO5n69\n++ARKodZg/GmjANcVjGMSiGXANDw8ips/H0vSzbt4M9TJ5mz8nu2Zh5k/fY9TOjchprRlQp8DeV0\nAnUm5X1epbAlc7eItAbSvLYdBzDGtMbOulo7s6Nj3gcTkb3ANkciGiQiOdcaA4AlIvKN1/FeEpE2\nzkdNETkOnDxLtiLF3zTr50u1Sy/l0OHDbNuxgxMnTrDwmyUkNm927icWA4lXxTL/uy0AfPf7XqLL\nhxBatgwAsRXD2LTnANl/nABg4449XBFRgdkPdeC97rfyXvdbaV2zGkM7/EUL6gII1JlUc2NMKaAS\ndl1qt4jsNMZcBjTmTI16FLBVRP4wxvwVKOXMgLx5G5iKXXPCGNMMuAFbbjmkYfXq7xpjooG+IjIQ\n2G7sNO5HoA2wrOhONQ9/06x7s+G7Hxg9YRLbd/xO6dKl+ezLBbRt3ZJql15K+7ZteGHAMzw9YCgA\nHW5oR40rSua9q+IviyauaiT3vvEfgjweBt/8Fz5I/4nylwTT7poreCihDg+89Smlg4JoUC2axldU\nKZGcgUTAGYydBfHbgFPA1cArwPXYdap1wHdAd+x6kRGRfsaYcOBz4CjwIZAAZGFnYXMd1XowduH7\nSufS7jNsAWY4L70YeAGYDtR2nvuCiHxijLnJyfGrc4xtIvJCYefhdwbjoCC2fLfunPu6herVrwRg\n08RnSjjJhVHqvgEXncE4UGdSm0Skn9fXb+fbPt77CxE5gP3NXg4TCjhmIvCxiOx3nnPjWV774fwP\niMinwKfnCq0oypkEakkVKcaYF4EbgTtLOouiXGwEXEmJyJvFcMzngeeL+riKopybQP3tnqIoAYKW\nlKIorkZLSlEUV6MlpSiKq9GSUhTF1WhJKYriagLuFoSAwuNHP0NO/smpfb+XdIrz55JQAIKuv6uE\ngyjnwo++CxRFuRjRklIUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6M3cwYY\nI6e8Svp3P+DBw6Dej1H3WpO77ctvlpHy9rsElylDh7at6XbnXwH4eP4CZr47l1KlStG7+998qlkf\nOe0N0r8XPB4Pg57oTt1raublXZJGyuy5BJcpTYfrWtLt9g6527KPHaPjw31I6nYXnW5sW9Chiy9z\n8gzSN/5gMxeosn/PUdm3zqeyd8a4BFX2/shFP5MyxrQxxswt4PGJxpgaJZHpf2VF+npHsz6Bl57t\ny4jJ03O3Wc36NGaMGcY7U15h4dI0du7OyNOsTx3L9FEv8uXiYnFEFJx33Qa2bN9B6pTRvPR0D0ZM\nzfWx2rzJrzHj5cG8M+ElFi5byc6MPbnbU2a/T3h53yvLrcp+O6kp4x2Vff4xTrFjPHmMM8Y5KvvZ\nzE5+pURV9v6KzqTOgoj0LekMF8ry1em0a+mlWT94iEOHDxMWGupo1sOIqGh9es0bNWDp6nQuCQ4m\nobHVrIeFhDC8fx/f5V27nnaJVk111RWXkXXoMIcOHyEsNMTmDbVaeIDmDeuxdM16Ot3Yll9+28am\nX7fRulkjn2XNzXwWlX1u5jDvzFZlf0nZYBIaxXuNccmo7P2Vi66kjDFlsELPK4Bs4A0gzBjzDlAf\neF9EhhljFgE9gc5AOGCAq7Caqk+MMZ2wavYTwCoRedoYcznwDlbnXhprN95GAer14ji3jH2ZxNXK\nu1yKqJijWQ91NOtH2LJ1O7FVHc16A0eznn2MpOdeIOvQIXo+eB/NG8UXR7wC8u4nruZVeXnDK5CR\nmV8Lv4PYmGjS0r+laf06AIye/iZDej3CB/MX+iTn6ZkLUtnvK0RlXw/wVtnnjLHvVfb+ykVXUsD9\nwE4RudcY0xXr5qsNXIO9/N3MmZr1y0Skg6OmetwY8w0wGGguIseMMf80xiQCzYDPRWS4MaYhUBWr\ncf9dRLobY6KABUA9X5yot67M4/EwauDTDBo9gbDQUEezbrftz8oiecRQduzaxf19nmPB+//A4/G9\nOemMvM/0ZtDYZMJCQ6gWY7XwH85fSIPahmpV3eGzO1Nl/zSDRk0kLCyEalVjcs9p/4EskkcMYceu\n3dzf9zkW/PPNEhljf+RiLKmGwJcAIvKeMaYNsEZEjgAYYwr6P2ex8+827KwqDrgc+MxRt4djZ2bz\ngQ+MMRWxvr5lxpj7KUC97piNi5ToyEgy9mXmfr17zz4qR0bkft20QT1mJ48FYNyrs4itGu1o1ms7\nmvVLfapZj46MICPTSwu/N5PKEV5569dh9sSXbd6ZbxMbE80Xi9PY+vsuFi1fxc49ewkuU5qYqEgS\nGtUv9rwA0VER5xjjusxOfsVmnjHLUdkfc73K3s1cjAvnf3LmeZ84x3O8t3uwOvXVXjr1eBGZIyIb\nsJeM3wAjjTF/5+zq9SInsUlD5n/laNblZ6KjIvJp1od4adbTSGgUT4smDfNp1rN9pllPbNyA+V/b\nhfqNP20iOrISYSHl8vIOGJaXd/lKEhrWZ8KQfsyd9gqpyaPpfHM7krrd5bOCAmeMT1PZFzbGOSr7\n/GPsPpW9m7kYZ1IrsWr0940xt/K/XXoJcK0xJlpEdjtevhlAS+AXEfnQGLMHuAtYTsHq9SKnYd3a\nxNW6mq5JTxEU5GHokz0czXoI7Vsl0uXWm+j+9CA8Hnj0vrvyNOutW3D3408CMLhPks806w3jriGu\n5pV07f0cQR4PQ3s/yrzPFti8Lf5Clw7t6f7ci1YLf8+drvjGtir7mo7K3sPQvk/kU9nf5Kjs841x\nmxbcnfQU4NsxDgQCTrN+Lhxd+kzs5dkfwCzgDhHp7GzfIyJR+RbO94hIsjGmDpAsIm2chfOBwDFg\nLdALiMdq1g9hZ2y9gZ8oQL1+rpzVL6t2avOqr4vuxIuRGo1bAafYvPTzko5y3tRIuAGAzSu/KuEk\nF4Yn5qqLbiHroispf0FLqnjRkvIfdM6pKIqr0ZJSFMXVaEkpiuJqtKQURXE1WlKKorgaLSlFUVyN\nlpSiKK5GS0pRFFdzMf5ZjFIseKCUH/7vdPLPkk6gnAOdSSmK4mq0pBRFcTVaUoqiuBo/XERQFMXt\nOO8Y8hEwQUSS821rB7yMfaeQ/4rI8MKOpTMpRVGKFGNMKDAF5x1wC2AycCeQCNxgjKld2PG0pBRF\nKWqOAR2AHfk3GGOuBPaJyFYROQn8F7i+sIPp5Z6iBBJHDhT/G8SFhBf6nlYicgI44bz/f35igAyv\nr3djLUxnRWdSiqKUJOd8Ez+dSQUYfqdZn/oa6d85mvWej1D3Gm9l+XJS3kl18rai2x23kpb+LX1f\nGMXV1S8HoNaV1RnS+zGf5XV9Zve/0+4O7Gwqh1gKuCz0RkuqCDDGvAc8KCJHSzKHt2Z905bfGDh6\nAqkpE4A8zfq816dQsUIFHuk/hHYtm1O2bFmS35zDv2ZO5siRbKbMettnJWWV5TtInTqWTb9uZeCY\nSaROHZuXd/KrzJsxkYoVyvPIsy/QrsVfAGhSvw6TXxzgk4yBkNlNiMgWY0wFY0x1rCLuVuC+wp6j\nJVUEiEjXks4AfqhZX7Mu95v4qisuc/IWpixPJzamZKWg7s9c8jMpY0wjYBxQHfjDGNMZ+H/AZhH5\nAEgC3nV2TxWRHws7npZUIRhjKgBzgFAgBGuEuQ7oBJwEPhaRl40xW4A62AXAqVgLzUmgC1ABq3Xf\nhHXyrRWRh4sjr19q1mt5Kctz8zrK8iPemnWbNzamCpt+3UrSoOEcyDpIj/vvIbGxb/L6a2ZfIyKr\ngTaFbP8aaH6+x9OSKpwYYKbj0WsLPIvVplfF3oj2eL79o4FeIrLWGDMMO439GGgE3I39TcY2Y0xF\nEdlPMeP3mvXn+jJozKTTNOvVYy+lx9/v4ebrWrB1x07uf2ogn70zg+AyZXye15WZ3b8mdcFoSRXO\nLmCIMaYfUBY4DMwFvsDOsGYXsP9oY0wIcKnX9p9FZCeAMWYHVste5CXld5r1/MryvfuoHFnJK29d\nZk8ebfO+9g9iY6pQpXIkHdq2BODy2KpERVRi9569VKsagy/wx8z+jt6CUDh9ge0i0gJ7HY2IJGFn\nUDHAImOMd9FPAiaJSGvgVa/H82vci2Wa4n+a9Xjmf73U5v3xZ6Ij8+V99vl8yvL6fPz5Il5PnQfY\ny9u9mfuJjor0SV6/yHzqVPF/+BidSRVOFLDe+fwOINwYM1REhgHDjDGtsGtO3vtvMsaUxd5xu9yX\nYf1Os17nWuJqXUXXnv2tZr1PEvM+/cIqy1s2p8stN9K9/1A8Hg+P3tuFSuHhXJfYlH4jxrJgSRp/\n/HGC5/s+4dNLPX/M7O+owbgQjDFNgLeArUAyMBGoDHyPVakvFZHBXgvn9wJ9sIvks5zndABeF5HG\nzjFXAZ1FZEthr+1/BmPYnLaghJOcPzWatQX8KzOA59Jahc/CD+4p/m/o8lE+XbDUmVQhiMhK4Fqv\nh/7fWfar7nw6w/nI4QPn38Ze+zZGUZTzRktKUQKJALwy0oVzRVFcjc6kFCWQ0JmUoiiKb9GZlKIE\nFDqTUhRF8Sk6k1KUQCLwJlI6k1IUxd3oTEopIk75p7LcH9XwhaG/3VMURfEtAfZjRFEudnQmpSiK\n4lN0JqUogYSuSSmKovgWnUkpSiChMylFURTfojMpRQkoAm8mpSUVYPidZn3a66R/9yMeDwzq8TB1\nr8nzBn5ZhEcEAAAgAElEQVS5JI2U2e/bvNe1oNvtt+Ruyz52jI7de5PU7S463XS9z/KC/42xv6OX\nexeIMWaiMaZGSecoCG/N+kvP9mXE5Om523I06zPGDOOdKa+wcGkaO3dnkHkgi+Q35zB76limj3qR\nLxcv813edRvYsu13UpNH81K/noxInnl63ikzmPHyEN6Z8BILl61kZ8ae3O0p77xPeIXyPsuam9nt\nY6y2GEVE+pZ0hrPhf5r19bRLbGbzXnEZWYcKUZbH12Pp6nV0uul6fvltG5t+3UrrZo18ljU3s8vH\n2BdiFV9rYwO2pM6iSH8XeA3oDPwMrMaq0H8SkfuMMZcCrwPBWEPxwyLymzHmJ2ANMB/4G9AT2IaV\nf1YADgBdgYrA206EMsD9IrLJGPMz8CGQiJWC3iIiJ4v6nP1Os56ZSVytq/LyhlcoRFm+gab16wAw\nevoshvR6lA/m+9704m9jHAgE8uVejiL9OmAAVpFeCls2TbCFsUVEmgItjTEVgeHAOBG5HquvGuIc\n60pgmIi87nX8fsBnItIS+BJoh9WvD3Ne8w3gCa/nvyUizYFKQL1iOufTOJtmveeg4bma9VOnrGZ9\nyoghjBzwFANHTvDJT+MC83p97vF4GPVsHwa9MoWeQ0dRrWo0pzjFh/MX0qC2oVrVKiWSMT/uG+NT\nPvjwLQE7k6JgRTrAChE5ZYzZBax1HtuNVZ8nAMYYMxhbaBnO9sMisjHf8RvilJiITMA+8TJgsjHm\nRWwZrXb2zRKRHMnoNue1ihy/06xHRpCxL882b5XlXnnr12H2pJE278y3ia0SzRdLlrP1910sWr6K\nnRl7CS5TmpjKUSQ0ql/seW1m/xrjQCCQZ1JnKNIdTpzlcw9wHOgiIm1EpKWIdHK2HS/g+H9y5vgN\nw86uWgEvnuV1cl6ryPFvzfomR1leLi/vc8Py8i5bSUKj+kwY0p+508aSmjyGzh3akdTtLp8VFPjB\nGOvCuV+RX5EefB7PSQNuB1KMMW2BGBGZc5Z9VwJtgZXGmMeAbPI06x7gNuxszGf4nWY97hqrLO/1\nLEFBQQzt/SjzPv2S8mGhtG/xF7rc0p7uz75gleX33Omz8iw0s5+NcSAQsJr1syjSSwFxInLIW3ee\n8zl2xjQLKIe9+H5ARDYbY/aISJRz3EXYhfOtzvHDgYNYxXprYCywBZiCtRk/CMzxev5cIFlEFhWW\n3/806/+fvfMOj6Jq+/C9oUkKqYQShFAfIBRDibRQJOIrlvdVQUGwC0oTEFBMpEgvglQBERQFBUXw\ns4MiiFIFQhDQoyIdhTSSQACV5PvjTJJNCBA0u5ss576uXNnZmZ397YGcfebMzLkzObjlK1dHKTDV\nW94KQHFp4yxsFWpcsQrPPHnQ4X/QtgrVnXqCz207qeKO6aQci+mk/k0G53ZS7ny4ZzBch7hf0WEO\njA0GQ5HGVFIGgzvhhsM3ppIyGAxFGlNJGQxuhamkDAaDwamYSspgcCfMmJTBYDA4F1NJFWmcPXPP\nv6Q4adazmtbmZt/TppIyGAwG52IqKYPBrTCVlMFgMDgVU0kZDO6E+xVSppIyGAxFG1NJGQzuhDm7\nZzAYDM7FVFIGg1vhfpWU6aTcDK0A/9FSgD+djwL8HUsB3j6PAvx9OwX4zc7LO+8Ndv/4MzabjZi+\nj9NQauXk3bydectW6rztW9Pzf52z152/cIG7eg2mT48u3HvbLU7LCzBx1nytWbdBzDN98rTxZua9\nZWnWO7aj533/Baw2fuc9q40fpn0r57Vxcceph3si8qCIKBGJzPN8kVWX50VEKorIAlfnyA+tAD/O\ninkzGP/8YMbNmpe9TivA5/LalLEsnf0y6zdvtVOAL2PZ3GmWAnyr8/LG7ePQ8d9ZMWsi45/ty7i5\nOVrDjIwMxs55ndfGx7B0+ljWWwqrLOYtW4mvj7fTsmZnjrXaeP4Mxj//LONmXqaN57zM+k12mvU3\nlrLs1enMnzzGaNavEWdXUlHA80qpb+2fLMrq8rwopf4AnnJ1jvy4NgV4OJt3xnJD6TK0ahbuGs16\n7B6iWkXovNWq5NGsp+HjlUezviuOe2+7RWvWjxxzkWY9lqjIVjpzaFVS09IubWN/O836jlhuKFM6\ndxs/V2z+uxcJHNJJXUZx7gt0BpqLSDLwOg5Ul4tIFNpI/CeQDNyPln8ORHvwmgDjgf8A4cAwpdSH\nInIvMMTaZodSaoiIPArcDlQGhgMzlVLNRORWYALawbdcKTVDRHpYn/cisE8p1dt6fRugPCDA1Dw2\n5ELh2hTgcUTcpEXK586fp8/wUZYCvKcTNeun82jWfYlPPm1p1stx9pydZj1uLxGNwgCYvGAJI/o/\nyeovNzglZ67MScmESZ42TrRr4/Rzuds4PKuNL+g2Tkuj/2MP0bKZg9rYnN0rMJcozpVSXwJfAC8o\npb7B8epyf+BBpVQ7IBW4zXr+JqAn8DQwCa2cehp4VES8gReBW6zX3Sgira3XVQXaAscBLLfeq+iO\ntzUQJSJl0R3zf5RSrYG6ItLQen1D4F6012/ANbXmP+RSBfhQYiZPp3/MGKpUqkhmZiaZmZmcTk1j\n9riRTHxhCNETp7tOs54377ABxEx7lf6jp1CloqVZ/3JDEdOs5zzObuNJ0+gf/ZLVxvaa9ZFMjB5K\n9MRpLmvj4oijDvcupzi3x9Hq8njgdREpie7Qvkb78eKUUhdE5HfgZ6XUWUu57guEoTujNSKC9Vw1\na3/fW3r2rP2XB84rpbJU7HdaeZOA/7O2qwcEWuu3KKUuiogDNesBxCclZS/nrwCfBsC0BYsJqVTB\nUoDXKzqa9QD/nLyNw1j2yjidd9FSS7O+LUeznpBI6VKlqFg+kFZNnKRZD8qrWU+kfJBdG4c3Ytnc\n6Trz/MWEVKzA+QsXnKhZd7/Oz1GV1OUU5/Y4Wl2+GOhvVUT/d5nX5adZ32lp1tsrpcLtDMZ5816S\nVURKA3OBB6z33XaF9yp0WjdvaqcA/yUfBfiLRUuz3rQxa7/Vg8j7fvntUs169DgSk1N03q07aNWk\nEa+8OISVc6ewYvYkutweRZ8eXZzWQYGlWd+gh1R1GwfmbuOhMXZtvJVWzcJpE9GUbbt2u6SN3QFH\nVVL/RHEOhasu9wWOiIgf0MEuz5VQQD0RCVZKnbKqt9fy3VCpRBEpISIhwAngY+AR4G+l1B9WBdiM\ngn/2f41WgNemW5/BlgK8P6s+X4uPl5edAjxaa8t7PJBHAa4Hc52uWa9dg24Do/Gw2Rg5oBer1nyt\n87a5ma63R/HE8DE6b7d7i8QfdpOGYYRJbbr1GYSHzYORz/Zj1WdrtRq+bWu63nU7Tzz7gs7cs1tO\nG7eP5IGn9UmJFwf1dVwbu+FhpKM6qbeAt0SkK1px3l1EHivA62Zar9tAjro8Ea0sP2T9fk1EOhVg\nX3OBTcDPwBRgNBB9pRcopdJFZBDwmYhcAGLRHdDl6AustB6/Z3VcX4rI90Cc9b6voBXvTmHI04/n\nWq5bq0b2407t2tCpXZtLXtPtv3fQ7b93ODxbfgx58qFcy3VrhmY/7hTZgk6RLS772gEPP+CoWFdk\nyNNP5FquWytn8L8otnFxx2jWiyhas/7t1TcsAlRvFglkcnDTGldHKTDV2/wHgIM7vnNxkmvDFhx6\nZc36oTjHa9ZDGzt1ylhz757BYCjSmNtiDAZ3wg2PjEwlZTAYijSmkjIY3ImMjKtvU8wwlZTBYCjS\nmErKYHAnzJiUwWAwOBdTSRkM7kSmGZMyGAwGp2IqKUMhYYNSZVwdouBkjd38fcG1OQobMyZlMBgM\nzsVUUgaDO2HGpAwGg8G5mErKYHAjnDGriVOnQMBUUgaDoYhjKimDwZ0w9+4ZDAaDczGVlMHgTrjh\n2T3TSbkZE2cvYPf+H7FhI+aZp2lYL1vBxbpvtzDv7XcoXaoUnW9pT8/77gbg47Vf8/q771OiRAme\neeIh2re82Xl55yxk9/6fsNlsxAzoTcO6dXLyfreVeW+vsPJG0vPeu9gWu4dBoydRK7QqAHVqhDJi\n4NNOywswce7r7N6vdOb+vWhYN0cWuu67rcxb+l5O5nvuZNvuHxg0erJd5mqMeMZBEmw3vJjTdFLX\niGUjTlFKrXZ1lrxs372HQ8eOs2LeDA4cOkL05OmsmKcdEBkZGYydMZdVi+bgV64cvYa9SFRkS8qU\nKcOcN5fxweuzSU8/x+w3ljqtk9q++wcOHTvBilenceDwUaInz2DFq9Ny8s6cz6qFM/Er50Ov50cR\n1UYr5Js3bsCsMVd0ajgw816dee5UnXnKLFbMnZqTedZrrHrtFSvzS0S1aZGT+aXhLslc3DGd1DWi\nlHrT1Rkux9adu4mK1H/INUOrkpp2hjNnz+Lt5UVySio+3t4E+GkhZcum4WzeGcsNpcvQqlk43p6e\neHt6MnbYQOfl3RWX/Udcs9qNpKad5czZdLy9PK28Wl0O0LJJYzbv3E1IxWCn5cuPSzOfuULmRmze\nGefczOZwr/hiVUDt0B6/MCAG6A7UB3qgHXkPAhnAh0qpaSKyCPhcKbVSRF4HvgLqAglKqTkiMhO4\nGS3+fBr4CVgCVEHr1kcrpT6xFF1fop2CQcBdSqkjhf0Z45OSCauTc+gR4OdLfFIy3l76D+fsuXQO\nHT1OSKUKbIuNI+ImbaY/d/48fYaPIvXMGfo/1pOWTcMLO9oV8tayy1vOyuup86af49Cx44RUrMC2\n2B+IuKkhIRWDOXD4KH2ix5CSeoZ+j3andTPn5M3JnKOwymlj+8wnCKkYzLbdeTLHjCMlNY1+j3Rz\naubiznXTSVnUBiKBJ4EXgHDgUbSPrxyQJUzbJCLvA8+hHXxHgBCl1HIRGQ0gIlHAjUqpFiLSFngA\n7QVcq5RaIiI1gPeBT6x9piqlOorIJOBenODis7+wz2azMSl6KDGTp+Pt5UWVShWz159OTWPOuJGc\nOHmSRwY+z9fvv4XN5uxL9nIPp9hsNia9MJiYyTOtvBXIzMwktEpl+j3Snds7RHL0xB88MjiaNcte\no3SpUk7PqzPnaePhg4iZMgtvL0+qVLQyh1Sm38PduL1DG5352RjWLF3gmMxuOCZ1vV2CsEMplQn8\nDuxRSl0ETgKN0B3YeuvHBwhVSiWiDcYfAwPy7KsJWj6KUmqjUmoEkAw0F5FN6Ioq0G77LIneMbRd\nudAJDgwgPikpe/lUQhLlAwOylyNuasSyOdNYMHkM3l6ehFSqQGCAP+EN6lGyZAmqhlTGy7MsSadT\nHBHvMnmTc/ImJlI+0N8ub0OWzZ7CgkmjdN6KwVQoH0TnW9pis9moGlKJoAA/TiUkOiUvQHBQAPFJ\np+0yJ+XJ3IBlsyaxYOJIvL2zMgfS+ZZIu8z+Ts1c3LneOqm/L/M4APhUKdXe+mmolNporasInAHy\nDixc5NL2e9DaVyRaL3+593ZImdK6eVPWfqNll/vULwQHBeDt6Zm9vtewF0lMPk36ufNs2LyNVk3D\nadO8Cdt2xZGRkUFySirp5847TWfeunkT1n6zSef9+VeCAwNz531ulF3e7bRqehMff7meRctXARCf\nmExi8mmCgwLz3b9DMjcLZ+3GrMwHCA7M08bPj7bL/L2VeQOLVujzLPFJDs6cmeH4HydzvR3uXY6d\nQAcR8QTOoQ/FhqM7qE5AR2CFiLS2e8331jZTRSQcfQj5G3BQKZUhIvcCpZ34GWjSsD5hdWrTrc9g\nPDxsjBzcn1Wfr8XHy4tb27am653/4Ykh0dhsNnr3eAB/a4C3U7s2PPD0IABeHNgHDw/nfHc1aVCP\nsDq16NZvKB42D0YOeppVn3+Fj7cnt0a2ouudt/HE0BFW3q74+/nSofXNDB07la83beWvv/9m1OC+\nTj3Uy87c/zk8bDZGDnyaVV+sw8fLk1sjW9L1jk48MWwUNhv0frAL/r7l6NA6gqHjpvH1pm389dff\njBrUx2WHp8WR60azbg2cN1BKDRWRO4EuSqlHsx4D24HH0RXSh0qpiSLyCTBBKbVZRMYAaegB8ayB\n82lAhPUWfa31HwHxwGJgIHpM6hagv1Jqr4j0B4KUUqOvlLf4adbh4Pb1Lk5ScKpHtAfg4LavXRvk\nGrFVlitW4RmxXzn8D9ojPMqpA5bXTSdV3DCdlGMxndQ/x9mdlDncMxjcCTe8Tup6Gzg3GAzFDFNJ\nGQzuhBsO35hOymAwFCoi8grQAsgEBiqlvrdb91/gReACsFwpNedq+zOHewaDO+Hi66REpB1QWynV\nEngCmGW3zgOYA3QG2gJ3iUiVq30k00kZDIbCpCPwIYBS6kfAX0Syrg4OAk4rpeKVUhnAOiDqajs0\nh3sGgzvh+jGpiuiLo7OIt55LtR77iEht4BDQAdhwtR2aSspgMDiS7GuqrPtmH0Ff6LwaOEgBbhEz\nlVRRxQY46faUf40NuHiRzORTrk5yDVh/GyWceueS43H9dVIn0JVTFpXRN/QDoJT6Bn1vKyIyEV1R\nXZFi8ldgMBiKCWvRt5khIk2AE0qptKyVIvK5iASLiBdwF3qOtitiKimDwZ3IcO2YlHWf604R2Yye\nQLJfnim3F6I7skxgolIq4Wr7NJ2UwWAoVJRSeSdzj7NbtwpYdS37M52UweBOuH5MqtAxY1IGg6FI\nYyopg8GdMJWUwWAwOBdTSRkMboQ7TmJpOik3Y+KseezeZ2nLB/bJo1nfzLwllmY9qj097/svKz/5\nnP/7Yl32NvvUz+z68iPn5V38LnHqN2w2iH7iQRrWrp6Td1ss81d+TOmSJekceTM9OncEYOqS99i5\n/xcuZlyk17130KllU6flheKnsi/uFKtOSkTuU0p94Oocl8N+7nRXvP/22D1aAb5gptasT5zGigUz\nAUsB/socVi16FT/fcvQaGkNUZCu63Hk7Xe68Pfv1X3z9jfPy7lUcPnGS5ZNjOHD0BDFz3mD55Jjs\nvOMWLuWDaaPw8/Gm99hX6BgRzuHfT/LLkeMsnxxDcuoZ7hsy2qmdVJFX2ZsxKdchIqFo47DhMmzd\nGUtUZCsgS7OexpmzZwFITknRmnV/Pzw8PLRmfUdsrte/+uZS+jzaw3l59+yn481NdN4bK5N69ixn\n0s/pvKln8PHyJMC3HB4eHrRoVJ8te/bTrL4wY1hfAMp5eZJ+/gIXLzrvD/NyKnsgl8o+u413xrJl\nR2y2yj44KNCpKnt3oDhVUnOBCBHJAJYC1dHTPCymAFpzIAV4Dyhj/fQDfrD2VQ3YDNyvlKoiIvXR\n895kog0wjwJ+aOHnAaAxEKuUelJEGgJvAUnWOgBEpB+XattHAzWs7O0tOWmhEZ+YRJjYa9b9iE/M\n0qz7aQV4lmZ9124iwhtnb/vDj4qKweVzyUQdTcLpFMJqhubkLedDfHIK3p5lCfD14ey58xw6cZKQ\n4EC2//AjzRvUpUQJDzxLlAHgg3Ubadu0ESVKOO+7tsir7M2YlEuZCvQH9gJ1lVKRIhJMwbXmR4Bj\nSqknrG3rAP8BbrBU6XcCg6zXzgaeUkr9IiJ90R3aMqApWqd+CjgmIn7ACHTn+H8iMg9ARKqj71/K\nq20HKK2UinREA+XlEgV4zDBiJk7D2zu3Zh3g/Y8/557bOzkj1mXJm3fiM0/y4pzFeHuWJaRC+Vx/\ngOu2xfLBV9/y+qghroiaTXFT2RdHis3hXh62W7+vRWu+BWgpIvOBWkqpL4B6WKp04DNyLMMRwEKr\nInsIqGA9/6tS6g9rwq4T1n7ro6swyJkbJ4J8tO15shc6wUGBxCfaa9YTKR9kp1kPb8SyV6ezYMpY\nvL29CKlUIXvd9tg9hDes76ho+ecN8CPBTul+Kvk0wQE5BvqIBsLSCS8w/8VB+HiWpXJwEADfxe5l\nwcpPWDBiMD5enpfs16GZi7rK3g0NxsW1k/rT+l1grblS6nf0YdoqoI+IjETP15HV6pnWD0A60MFS\nrrdUSj2Tzz6xXm+/j6z2/JPLa9v/xEG0jmjK2g26b9aa9Tza8iHRJCYnk37uHBs2baVVMz0edDIh\nEc+yNzjdqtv6pgas2bJD5z1wmGB/P7zKls1e33vMdBJPp5J+/gLrv4+jVeP6pJ1NZ+qS95gXMxA/\nH2+n5oXip7J3B4rT4V4Gl+YNooBacxGJAkoppT4Xkf3Aq+ixpC7WJp3s9h+HPhT8XES6oWcUPED+\nKKAZsAY90yDomQkn56NtdyhNGoYRJnXo9vQgrQB/tj+rPlurFeDt2tD17s48MfgFrS1/qFu2Zj0+\nIZFAfz9Hx7uE8Lq1CKtRje7Dx+NhszGid09Wf/0d3p5lubVFU7rc2o4nX5qm897XGf9yPry3dgPJ\nqWkMfnle9n4mDXySyuUDr/BOhUeRV9m7eBYER1BsDMYiUh79x/8BcMDSnIdSQK058CZ6kPxvdIc3\nCvgeXVmVQx+q9VZKBYtIPeA1a7tz6IqtHLBSKdXMyrMD3cEFAG8Ax4HfAG9L396XS7Xto7EU7Vf7\nvKFVq2Qe3LnpapsVCao3bQ0XL/LblytdHaXA1LjtAQCKiyU6C1uF6lccyLq4YbnD/6BLtO9mNOvO\nQkQC0Id1H4hICLBOKVXX1bnAdFKOxm07qfXvOL6T6vCg0aw7kTTgfhEZhh5PGuziPAaDIQ/XdSel\nlPoLfUmBweAeuOGRUXE9u2cwGK4TrutKymBwO8y9ewaDweBcTCVlMLgTZkzKYDAYnIuppAwGdyLD\n/cakTCdVZMm6LbA4YIMSJfGoWP3qmxYVsgaYzyS7Nse1UqEYtXEhYTopg8GdMGNSBoPB4FxMJWUw\nuBPmOimDwWBwLqaSMhjcCTcckzKdlMHgTrjhJQjmcM9gMBRpTCVlMLgT5nDPUNTRmvUfLc1633w0\n68vsNOv/szTrX2VvozXrHzst74QZc4nbtx8bNqIH96dR/ZyJUb/a+B3z3lxK6VKluCPqFnp2vYez\n6ed4fsxEUtLS+OvPv+j3xMNEtohwWl6Aia+9xe6ffsVmg5inHqFhnZrZ69Zt2cG85at1G7drSc+7\nbuPc+Qu8MH0eCadT+PPPv+jT/V46WFJUw9Upkp2UiFQEXlJKPZXn+ZeBvUqpN+2e87aeCxWRQ0AD\npdQZJ2TsopQqUvPlbo+N0wrwBbM4cOiwpVmfBeSnWY8mKrJ1Hs16nHM167t2c/joMVYsnKvzjp/C\nioVzc/JOm8XqN1/TeZ99nqh2bfjqm++oXvVGhvTtxcn4BB7p/yxfrHjLeZl/2M+h43+wYvoYDhw5\nTvSMBayYPiYn87w3WDV7In4+3vQaOZmols3Yte9nGtSuwZNd7+b4yXgej5nguE7KXILgHCy33VNX\n39KlONz+cq3k1qxXy6MAT8HH2yuPZn1XrtdrzXpPp+XdsmMXUe3aZOdNSbXTwp9OoZydFr5FsyZs\n/n4n/n6+nE7VzrrUtLRsG4uz2Lp7H1Etm+nMVUNIPXOWM+npOnNqGj5eXtlq+JaNw9gcu5fO7Vry\nZNe7AfgjIZGKQc6zRLsDLqmkRORRoB3a4hIGxADd0aLNHsBJLDOLiPQEnkdLPs8Be0WkHNoacwPw\nXT77rwwsQiuuLgJPKqWO5NlmPNrXVwKYo5R6V0Qao3Xuf6FNMV3R86AvBSqh9eyjgIZAYxFZpZS6\n9zL7ehPt2AsEPkbbjMsDAkxVSi36N22YH/GJyYRJnezlAD/ffDTrxwipVJFtu+KICG+Uva1LNOtJ\nSYTVtcvr70d8YpLO6+/H2fT0nLw7dxPR5CZ6P9SdVZ9+wa1depCadoYF0yY6LS9AfPJpwmrl3D8X\n4OtDfFIK3p6eBPiW4+y5cxw6/jshFcqzbc9+IhrlCFe7DRnJyYQk5o8e5riAbqi0cmUlVRu4G5gI\nvICWe05Ed1YAiIgNmAB0tLatZa3qiT7EiwR257PvscA0pVRHtPNuhP1KEYkEqiml2qL1Vy+KSFkg\nGBiglOqANhv3QHdIQda2twEBSqmpQIrVQV1uXwBJSqn7rMcN0br3/wEDrrm1/gGX06z3jx5tKcBz\nttWa9ducEeuyXJJ3xHCix0+h//MjqFK5ImRm8n9ffEnlisF8uXIZS+ZMY8y0mS5MnHuc2mazMWlI\nH2JmLKD/2OlUqVg+12daPm0Mr44cyrCpc7meLU3Xiis7qR1KqUzgd2CPUuoiuoKyr98DgTSl1ClL\nmpDleMpPbW5PK2C0pUl/gdz69az1Laz1a9DtUMl6/wki8g26swwEfgJ8RORtdCe0vID7gtxK9S3W\nZ8zSvhc6V9esN2bZq6+wYMq4fDTrcc7XrAcFkZA3b2DOP1VEk5t4Z/4sFkybiI+XNyGVKrJrz17a\n3NwcgLq1a3EqIZGLFy86L3OAP/HJp3MyJyVTPiBHrBrRsD7Lpo5mwUvP4e3pSUiF8uz95Td+j08E\noF7NUC5ezCApJdUxAY1mvVD5+zKPbXke27eKRz7P5/cZ/gS6WorzSKXUvfmsX2SnQa+nlPoNmAnM\nVEq1AxYAKKXSgRbWcmfg9QLuK2vd1T5jofHPNesJeJYt63zNekQz1qzfaOX9Wef1ysn75ODnSUzS\neddv2kzL5k2pViWEuH0/AnD89z/wKluWEiVKOC9zk0as/W6bzvzrQYID/PH2zFHD9xoxicTTKaSf\nP8+G7btodVMDduz9iTdWfQJAQvJp0s+fx7+cj9MyF3eK5Nk9OxIBXxHxA84CrYEt5KjNPyBHbW7P\nNvRh1TwRuQWoqJR6J8/6l0VkMnrcaqpSagB6jOyAiJRBd0hbRaQJUF8ptVREtgFZNkmPq+zL6WjN\nem26PT3Q0qwPYNVna7QCvF0but59O08MHp6PZj3JJZr1Jo0aEFa3Dt169cfmYWPU0IGs+vQLnbd9\nJPf/9w4eHzQMGzZ6P/wgAX6+PPC/u4geP5mefQby98WLjH7OuarEJvXrEFa7Bt2GjMTD5sHIvo+x\n6stvtMq+VXO6/ucWnnhxIjag9/3/xd+3HN06RxEzYwE9ho3m/IU/GdH3Mcdp1t3wMLJId1JKqQxL\nTQ5QHbEAACAASURBVP4NcAjYa616C1gtIuvQA+d5/2VGA2+ISHdr3aN59rtZRNajOzwb8Kq1ajbw\nIXDAejwHfQjXU0SeQg/CT7W2jRWR7UqpiMvsyyUM6fNkruW6tXOu4enULpJO7SIveU2DunVYOG2C\nw7Plx9C+vXMt161dK/txp/Zt6dS+ba71Xp5lmTl+tDOiXZYhj3XPtVy3RrXsx51aR9Cpde7rtm4o\nU5ppz7vke8stuK4160WZ0Ko3Fi/NOnBo9zYXJyk4oY30ZQQHv/nExUmuDVvNJlfWrK+a6XjN+r0D\nnTplbJG8TspgMBiyKNKHewaD4Rox10kZDAaDczGVlMHgTph79wwGg8G5mErKYHAn3PBsvamkDAZD\nkcZUUgaDO+GGZ/dMJ1WEsdmKi2YdyMwk889zrk5RcLLa9gYv1+YwXBXTSRkM7oQ5u2cwGAzOxVRS\nBoM7Yc7uGQwGg3MxlZTB4E6YMSmDwWBwLqaSMhjcCTe8TspUUgaDoUhjKik3Y8LMV4mzNOvRg/rS\nqJ6dtvzbTcx7cxmlS5fijo4d6Nnlf7z/8ed8tObL7G32/vQzsV85b7bKiXNeY/e+n7QWfsBTNKyX\n4+Fb990W5r21nNKlS9H5lnb0vPcuAD7+cj2vv7uSEiVK8MzjPWnf0sma9bmvs/tHhQ0bMf170bBu\n7ZzMm7Yyb+l7WrPeIZKe99zJtt0/MOilydQKrQpAnerVGPGMg9y3bjgmZTqpAiIi9ymlPnB1jiux\nPTaOw8eOs+K12VpbPuFlVrw2G7AU4NPnsHrxPK0tH/ICUW1b0/Wu2+l6V45m/XNnatZ3/6C18POm\nc+DQEaInz2DFvOk5eWfMY9Xrs/Er50Ov50YS1aYlZcqUZs6by/hg4SzSz51n9uKlTu2ktsft5dDx\nE6yYM5UDh48SPXUWK+ZMzck86zVWLXhFZx7+ElFtWgDQvHEDZo0uctLrYoE53CsAIhKKnbS0qLJl\nRyxRkXq+8Zqh1UjJo1kvZ6dZ19ry3Jr1uW8spa8TNetbd+4mqk1LK29VUs+c4cxZS1mekqq18H6+\nWlnepDGbd8ayZWcsrZqG4+3pSXBgAGOHPeO0vABbd8UR1Vp3PDWr3Wip7C+XuRGbd8Y5NR+ZmY7/\ncTKmkioYc4EIEclAK9erA1HAYqAK4AWMVkp9YklCv0SLRIOAu4AU4D20pr0M0E8ptSvvm/xbtLY8\n59Djypr13USEN87eds+PP1HJyZr1+KRkwiTHDhPg60t8UhLeXp4E+PlqZfmx44RUrMC22D3ZWvhz\nFy7Q54WXSE07Q//HetCy6U3OzVwnx8AT4OdLfFJyTub0cxw6doKQisFs2/0DEY0bElIxmAOHj9In\nZhwpaWn0e7gbrZuFOy1zccd0UgVjKtAfrdSqq5SKFJFgYK1SaomI1ADeB7IGc1KVUh1FZBJarX4E\nOKaUesLatk4+71HoZNqZvmw2G5NefI7oCS/j4+1FlUoVc30rrvz4c+7p3MkZsS7LJXlfGELMpBl4\ne3taWni9/nRKKnPGjeDEyVM8Mmg4X7/3pstuxr5EDT98EDFTZ+Ht5UmVihXIzMwkNKQy/R7uxu3t\n23D0xB88MiSGNW8vcIyMNcP9xqTM4d61k6VOTwaai8gmYAm5Ve5ZAtEspfoWoKWIzAdqKaW+cESw\n4KBAEhKTs5e1tjy3Zv2deTNYMHU8Pl5ehFSqmL1u2644whuGOSLWFfIGEJ9knzcpd96bGrJszlQW\nTHoJb29PQipWINDfj/AG9ShZsgRVQyrh5VmWpNMpzsscGEB8kp1mPTGJ8oH+OZkbN2DZzEksmDAS\nby9PQioGU6F8IJ07RGKz2agaUokgf39OJSQ6LXNxx3RS106WOv1BIACIBO7Js00upbpS6negMbAK\n6CMiIx0RLLe2/JdLteVDXsjWrK/ftJWWWZr1+AS8PF2gWW/ehLUbvtN5f/6V4KCA3Fr4YSNITD5N\n+rnzbNi8nVZNb6JN8yZs2xVHRkYGySmppJ87h79vOedlbhbO2o2brMwHCA7Mk3n46JzMW76nVdOb\n+PirDSxasRrQh4uJyacJDgrMd///GjMmdd2SwaVtFQQctCzL96IV6/kiIlFAKaXU5yKyHwdZjps0\nDCOsbm26PfWM1pY/+wyrPl2Dj7fWrN9/V2ceHzQcmw16P9SdgCzNemISAa7QrDeor7XwfYfg4WFj\n5KC+rPr8S61Zb9uKrnf9hyeGvqiV5T3uz9bCd2rfhgf6PAvAiwP7OE5Znm/meoTVrkW3/s/pzAOf\nZtUX67RmPbIlXTt34onnRuk2frAL/r7l6NAqgqHjpvH15m389dffjBrUx+lfCMUZYzAuACJSHtgJ\nfAAcUErNsc74fQTEowfQB6LHpG4B+iul9opIf3Rn9iZ6wP1vdIc3Sin1bd73sSe06o2Zh3ZtdswH\nKmRCm7SCzEwOfu+8yxf+LdWba337wa1fuzjJtWELkSsbjBeNdLzB+IkxTh0ANJVUAVBKxQNV8zx3\nCGhk99Qy6/cYu23m2K1v46h8BoM7Yzopg8GdcMMjIzNwbjAYijSmkjIY3AlznZTBYDA4F1NJGQzu\nhBmTMhgMBudiKimDwZ1ww/mkTCVlMBiKNKaSKsoUJ836xb/IPKpcnaLg2PT3s+0GbxcHKWTMHOcG\ng8HgXEwlZTC4E244JmU6KYPBnTCXIBgMBoNzMZWUweBOmErKYDAYnIuppAwGd8LcYGwwGAzOxVRS\nbsaEma8St3e/pVnvR6P6dpr1jXaa9agszfpnfPTFV9nb7P1JEbvuU1dEZ+LS1cT9elhn73kPDWvm\nTIa6bucPzP/wS0qXKknnFuH06BTpkowAE2bOJW7vj9hsED2ofz5tvNSuje+x2theZa+IXfeZY8K5\n4ZjUdd9JicijQAOl1NACbj8aSMgzNTAi8n9Kqf8WfsKCsz02jsNHj7Fi4RytWR8/lRULdUytWZ/N\n6jfma836s1ma9c50vatz9us/X7fBNdl//JXDfySwfPQgDhw/SczCd1k+elB29nFLVvHBuCH4eXvS\ne+prdGzakIqBzpdH6DY+foU2nsXqNxZYbTycqLZtikwbF1fM4V4h4eoOCmDLjl1Etb2MZv10CuW8\nve006+Fs/n5nrtfPXfw2fR97yOm5Abbu+4WOTRsCUDOkAqlnz3Em/TwAyWln8fEsS0A5b509rDZb\n9v3skpyXtnHaFdq4ST5t/JZj29gordwXEemHdullAB8qpaaJSDhaP3XB+nnA2ry5iKwFKgNDlVJf\niEiCUirI0leNRfv5koH7gVZoA3IGUA9YqZR6qbA/Q0JiMmGSI0cO8PclPjFJa9b9/Tibnn55zfr+\nn6hUwbmadXsSUlIJq14leznAx4v4lFS8PW8goJw3Z8+f59Af8YQEBbB9/680r1frCntzYM7EpDxt\n7HeNbRzssjYurphOSlMdaEqO0WWTiLwPPAa8qpR6W0RuAbKUv8FKqU4i0gBtL7Y3EvsDDyqlDorI\nW8BtQBoQAdRFV6+HgELvpPJi/6Vns9mYNOJ5oidMxcfLiyqVKmFnNWflx59xT+fbHB2pwNh/X9ts\nNiY+9SAvvvYu3p5lCSkfWGTGXi7RrI8YbtfGeVX2TmjjItIuhYnppDRNgFLAemvZBwgF/g+YJyJ1\ngBVKqZ9EBGADgOXWuzHPvuKB10WkJFAD+BrdSe1SSqUDWPsodIKDAklISspe1pr1HFOu1qzPBGDa\nvNcJqVQhe9222DhefHaAQ3IVhGA/XxJOp2Uvn0pOIdgvx0wcUa8WS0c+A8D0FZ9QubxrqpFra+OF\nuVX2sbtd2sbOQkReAVqgv2sGKqW+t1vXD+gJXAR2KKUGXW1/ZkxKkwF8qpRqb/00VEptVEqtA5oD\nPwFLRKSDtb3911Xer67FaDloO3Qnl8XfOJjWN9tr1n++VLP+7HASkyzN+ndbaNm8KWBp1ss6X7Nu\nT+uGwprv4wDYd/Aowf6+eJW9IXt97ykLSExJI/38BdbH7qNVgzqX25Vjcxb1Ns7IcPzPFRCRdkBt\npVRL4Alglt26csAwIFIp1QaoLyItrvaRTCWl+QboICKewDlgBjAc3cifKqWWiYgNCLe2bwNMEZFG\nwOE8+/IFjoiIH9AB2OOMDwCWZl3q0K33AGweHowa8gyrPv0CH29vrVm/+w4eH/w8Nmz0ftj1mnV7\nwutUJyy0Ct1fmomHzcaIR+5j9cbteJe9gVubN6JLh5Y8OXk+NpuN3ndF4e/jmnmgmjRsoNXwvftb\nbTzQamMvbm0Xyf13d+bxwc9Zbfxgnjb2d0lmJ9MR+BBAKfWjiPiLSDmlVCp6nPZPwFtEzgCeQNLl\nd6UxnZQmCd0xbUSXoR8qpc6JyK/A+yKSgh44fwzoA5wSkY/Qh3MD8+xrLrAJ+BmYAowGop3xIQCG\n9u2Va7lu7ZrZjzu1j6RT+0uvL2pQtw6vT5/k8GxXY0i3u3It160Wkv24U/NGdGreKO9LXMLQvr1z\nLedu47Z0at/2ktc4rY1dPyZVEbA/pRlvPZeqlDovIi8Bv6GLgeVKqauepr3uOyml1Jt2i6/mWfcF\nuQfFQXc6+e0nyPo9Ehhpt2qJ9fvdvNsaDNcB2dPLWod70UAdIBX4WkQaK6XirrSD676TMhjcCtdX\nUifIOQsO+jKd363H9YDflFIJACLyLfqs+hU7KTNwbjAYCpO1QBcAEWkCnFBKZZ22PQTUE5Gy1nIz\n4Jer7dBUUgaDO+HiWRCUUptFZKeIbEafNe9n3XqWopRaLSJTgfUi8jewWSn17dX2aTopg8FQqCil\nhud5Ks5u3QJgwbXsz3RSBoM74foxqULHjEkZDIYijamkDAZ3wlRSBoPB4FxMJWUoHEqVwUOauzpF\nwbmob6XMOHnItTmuEY/AkCtvYCopg8FgcC6mkjIY3AljizEYDAbnYiopg8GdMGNSBoPB4FxMJWUw\nuBOmkjIYDAbnYiopg8GdcMNKynRSbkZx06xPmD6LuL37sGEjeshAGoXVy8n7zbfMW7RE5+0URc/7\n7yMjI4NRE6fyy4GDlCpVktEvDKNmaDWn5QWYuPhd4tRvWrP+xIM0rF09e926bbHMX/kxpUuWpHPk\nzfTo3BGAqUveY+f+X7iYcZFe995Bp5ZNnZq5OGM6KTtE5D6l1AcF3HYGMFMpdfAy6w+h9e1n/ul7\nXCvFTbO+fWeszrt4AQcOHiJ67ERWLF6Qk3fKK6xeugg/X196DRxKVLtIftj/I2lnzrJ88XyOHDvO\n+GkzWfDKFOdl3qs4fOIkyyfHcODoCWLmvMHyyTHZmcctXMoH00bh5+NN77Gv0DEinMO/n+SXI8dZ\nPjmG5NQz3DdktMM6qUxznZT7IiKhQPeCbq+UGnS5Dqqw3uNaKW6a9S3f7ySqnRZD1KweSkpqGmfO\n2OX18SbA31/nbd6Uzdt3cOjosexqq2qVEE78/gcXL150Wuate/bT8eYmOvONlUk9e5Yz6ed05tQz\n+Hh5EuBbTmduVJ8te/bTrL4wY1hfAMp5eZJ+/gIXL7pfZ+IorttKSkSqAkvRdpiSaC9eAxEZie68\na6DNxlFol14VwAsYrZT6REQ2oNXpp4H30aqejWinWHvrbfqLSGdr/7ehTTIRIjJSKTWmsD9TcdOs\nJyQmElYvR5SqleWJeHvb5T1ylJDKldi2YxcRTcORWjVZ8u57PNL9fg4fPc7R4ydIPp1CkJNyJ5xO\nIaxmaE7mcj7EJ6fg7VmWAF8fzp47z6ETJwkJDmT7Dz/SvEFdSpTwwLNEGQA+WLeRtk0bUaKEg+oD\nNxyTup4rqS7Al0qpDmgt1RrgG7vOo7RSKhLt0VtryT7v51I9+mDgPWt9mTzr9iql2qLdfB2BqXne\nw6FcTrPef/jIIqlZv0RZPiqG6LET6T8smiqVK0FmJu1at6RhWD169O7PkuXvUaN6tVyvc3Xmic88\nyYtzFjNg0hxCKpTP9Y+wblssH3z1LSN69XBF1GLLdVtJoSeMX21JPFcCW9ETw2ex3fqdDDQXkd7o\nOZsDyU09YIX1+CMgwm7dd9bv4+jO7nShpc+H4qZZDy4fREJiYvbyqfgEygfl2L4imobzzkJtGZs2\nZz4hlSsBMLhPjvcu6n/3ExjgPOlmcIAfCadTspdPJZ8mOMA3ezmigbB0wgsATH97JZWD9ef5LnYv\nC1Z+wmsjB+NjZzwudEwl5T4opfYCjYFvgYlA1Tyb/Gn9fhAIACKBe/LZlQ3decGlyvW/82znUIq8\nAvySvBGssQbq9/2kCC4flDvvM0Ny8n67iZYRzfjp5194YcwEADZu3kr9unXw8HDef+PWNzVgzZYd\nOvOBwwT7++FVtmz2+t5jppN4OlXr4L+Po1Xj+qSdTWfqkveYFzMQPxeZl4sz120lJSLd0A6wD0Uk\nARiDFhbmJQg4qJTKEJF7gdJ51h9AV2A7gNuv8rYZOLDNi5tmvUnjhoTVFbo9/jQ2DxujnnuWVR9/\nppXlHdpx///u5vH+g7Va/dGHCPDzw69cOTIzMunySC/KlCnNy2NGXv2NCpHwurUIq1GN7sPHax18\n756s/vo7vD3LcmuLpnS5tR1PvjRNZ76vM/7lfHhv7QaSU9MY/PK87P1MGvgklcvnLcoLATespGyu\nPJ53JZYTbD5wBj14PgZYBnwApAAJSqk51hm5j9C66MXo8atPgFvQA+cXgPeARGAb0EIp1dH+EgQR\neRnYC3yKVlB/oJQafKV8oVVvzDwUu6UwP7LDCA1vCTYbh37YefWNiwih9bSy/bcvV7o4ybXhUb/1\nFSvyv2N6OPwPuuT4ZQ4/Ksj1fs58s6KEUmoXuceP4NJDPpRSh4BGdk8ts36PARCRMKC/UmqTiHQH\nyluvC7Xbx9ArvYfBUGi44XVS120nVYikAQtEJBN9OPeYi/MYDG6F6aT+JUqpI0AbV+cwGAC3HJO6\naiclIt7Ao0B99NmrPcBbSqlzjo1mMBgMBauklgNJwCb0afRI9Fms/zkwl8Fg+Cdcj5UU4K+UutNu\neb6IfOuoQAaDwWBPQa6COygiFbMWRKQC8IvjIhkMhn9MZqbjf5xMQSqpasABEdmH7tTqAvtFZCOA\ndW+awWAwOISCdFIvOjyFwWAoHK7T66RK5PekUurrQs5iMBj+LdfpwPkIu8elgTD0mT7TSTmaks69\n4fdf45Hv91kRxfpjvviXa2MYrspVOylrvqVsRCQYPWuAwWAoarhhJXXNc1wopU6h51AyGAwGh1OQ\nK87fJvc8STeiZw0wGAxFDTespAoyJvWV3eNM9JxLax0Tx2AwGHJz1cM9pdQS4Bv03f5pwC6lVLqj\ngxkMhn9ARobjf5zMVTspEXkaWA90A3oAG0TkEUcHMxgMBijY4d5DQD2l1HkAEfFCHwIucWQwg8Hw\nD3DDMamCnN37O6uDAlBKnSVHUmAwGAwOpSCV1FERmQ18aS3fBhxxXCTDv2HC9NnE7d2PzQbRQ56h\nUf2cq0W++uZb5i1+m9KlS3HHrbfQ8/77OJuezvOjx5OSdoa//vyTfk8+RmTLvLMqOzDvtBnE/bBP\n5x06mEZh9XPybtjIvEVvUrpUKe64LYqeD3QlIyODUROm8MuBA5QqVYrRLzxHzeqhTssLMPGN94j7\n5Tds2Ih+/AEa1sp5/3XbdzP/g88oXaoknVs3p8ft+jLDn48cp//kV3nkzqjs5xzCdVpJ9UZ74x5D\nT3532HrOLRGRQ9ZEf/bP/UdE+rgqU0HZvms3h48eY8XieYx/8XnGvzwre11GRgZjp85g4YwpLFsw\nm/XfbuaPk6dY/cnnVK9WlbfnzWTmpLGMnz7rCu9QyHl37uLwkaOseHMh40fGMH7qK7nzTpnGwlnT\nWPb6PNZv3MQfJ0+xbsNG0s6cYfkbCxk/IpopM2Y7LS/A9n0/c/j3UyyfMJxxfR9m/OLluTKPW7Sc\nBdEDeHvMUNbv2MMficmkn7/A+EXLadGwrlOzugsFqaQeUEpNcniSIoxS6gtXZygIW77fSVS7SABq\nVg8lJS2NM2fO4u3tRfLpFMr5eGerq1o0b8rm73fi7+eH+vU3AFJT0/D3873c7gs/7/YdRLVvl5M3\nNdUu72nKeXsT4K/Fny0imrF52/ckJidnV1tVb6zCid//4OLFi5Qo4Zxbcrb+8BMdI27SmatUIvVM\nOmfSz+HtWZbktDP4eGndOkCLhnXZsudH7mp7MwuiB/D6h2scH9ANK6mCdFL3isgqpVTK1Td1PSLy\nKHrO8fKAoNXmB4AJwF/AMeBxoHve7ZRSi6zdRItIJFrueQ96FtIGwBz0CYMDaLForFLqSRFpDMy1\n9p8BdAXKAUvRyqy5wP1KqYesjAuBj5VSHxXmZ09ITCKsbp3s5QA/P+ITk/D29iLA34+z6ec4dOQo\nIZUrsW1nLBFNbqL3Iz1Y9cnn3Hpvd1JT01jwyuTCjHT1vPVyqosAf3/iExOtvP6cTU/XeStVYtuO\nnUQ0bYLUrsWSZct55MEHOHz0GEePnyD5dApBgQHOyXw6hbAaOcKfgHI+xJ9OxduzLAHlfDh77gKH\nfj9JSPkgtu9VNA+rQ8kSJSjppE7UHSlIJ1UWOCQiCrsB8yI+j1RDoBVQGz398Q3ArUqpoyIyB20l\nzsxnu6xOao9SKtry5T2Evj4si6bAA8Ap4JilaQ8GBiilYkVkDPpSjY+BcLTC6jQwTURuQLdha6Cf\noz58Fpl2NwrYbDYmjYomeuxkfLy9qFK5EpDJ/32+lsoVK7Bo1sv89POvRI+bzKq3Fjo6Wv55M/Pk\nfWkE0S+Nt/JWhsxM2rVuya64PfTo1QepVYsa1UNxpTsybxtP7P8oL859C2/PsoRUCLrCKx1ExvVZ\nSY11eIrCZ4tS6qKIHAN8gfNKqaPWuvVAO2BXPtthtw3AdqAt2k6cxa9KqT8AROSE9bqTwGQR8QQq\nk+PmO6CUSrS2/QToDPwOfKuUKvQzpMFBgSQkJmUvn4pPoHxQjiU3oslNvLNwDgDT5i4gpFIltu/a\nTZsWzQGoW6cWpxISnHb4FFw+iITExJy8CXnyNm3CO4vm67yzXyWkciUABvd9KnubqLu7EBjg7/Cs\nWQT7+5FwOkd0fSophWD/nP86EWF1WDpuGADTl612jKX4OqMgA+cl8vnJFJHKjgz2L/nb7nEAWiCR\nRWn0IVne7ey3ybzM47yvyXrdTGCmUqodsMBunX1H9Bb6MPBu4J0rhf+ntG7RnDVffwPAvp8UweWD\n8PbyzF7/5MBhJCYlk37uHOu/3UzLiKZUuzGEuL0/AnD89z/wKlvWaeM7rVtEsGad/j7Y96MiOCgI\nby+vnLwDBpOYlGTl3UTLiOb89PMvvPDSOAA2bt5C/bp18PC45vvk/3nmxvVZs3WXzvzbEYIDfPEq\ne0P2+t7jZpGYkkr6+Qus37GHVo2cey9+Zmamw3+cTUEqqRj04cnP6BuLBa0Kry4iE5VScx2YrzBI\nRneqVS1HXjvgO6782SPRuvUWwI8FeI8g9BTLZdDV0ta8GyildotICPrQMPraPkLBaNKoIWF169Dt\niT7YPDwYNWwwqz75HB8vL27t0Jb7/3cnjw8Ygs1mo/ejPQjw8+OBe+4meuxkej41gL8vXmT08CGO\niJZ/3saNCKtbl26P9cJm82DU8KGs+uhTfLy9uPWW9tx/z395vN8gbDbo/ejDBPj74edbjsyMTLo8\n/DhlSpfh5XGjnZYXILxuTcJqVKV79GQ8PGyMeLI7q9dvxtuzLLfeHE6XqDY8OXYmNmz0vuc/+Jfz\nZt+Bw0xespLj8YmULFGCNVt2MWvY0/j5eF39DQ0F6qSOAM8opfYBiEh9YABwK/qevqLeSQH0At4R\nkb/Rg97LgZ5X2D7M7pKD0cC9V9n/bOBDa9+z0QPsK/LZbi3go5Ry2NfR0P5P51quW6dW9uNOHdrR\nqUO7XOu9PD2ZOfElR8W5KkOf6ZtruW6d2tmPO93Snk63tM+13sPDg0kvjcCVDOmZ+79D3dAbsx93\natGETi2a5FofVrMab41xUud/nY5J1crqoACUUvtFpL5S6ryIFLkpW5RSb9o9PgOEWot5LcP5bqeU\nCuVS3rR73MzudVmPX7N+slidd1sRsQHtgdy9iMFguCIF6aTSrbNcG9BjOa2A0iJyG/r0uuEqiEgo\n+vDxPaXUry6OY3BnrtPrpLoDg4Gn0APtPwFdAC/06XnDVVBKHUJfumAwGK6RgsxxniQi04A66Erq\nZ6VU6lVeZjAYXECmG45JFWQ+qWeBX9Gn2ecCv4qIwy9ENBgMBijY4d6jQM2s22JExJ/ic1bPYLi+\ncMMxqYJcBXfc/r49pVQy+lS7wWAwOJzLVlIi8rj18IiIfISejTMDuAU9dYvBYChquOGY1NWuus4i\nEX2zLEAK+syewWAwOJzLdlJKqcecGcRQzMnIIDM1wdUpCk4JrbD3qFzrKhsWL1w5I4SjKIgc9CiX\n3mSLUqpqPpsbDAZDoVKQs3v2t5OUBjoCnpfZ1mAwuJLrbEwKAKXU4TxP/SIia4DpjolkMBgMORTk\ncO+WPE9VBWo6Jo7BYPg3XJdjUsAI9JiUDX0JQjr6Pj6DwWBwOAW5mPMt9B38twJl0AICcWQog8Hw\nD8nMdPyPkymod28h2pjyA3repfsdmMlgMBiyKUgndc6SBnQG3ldKZZDPJQkGg6EIkJHp+B8nU5Ax\nKURkLnqe814i0hKtiDIUQYqbZn3i3IXs3q+w2WzE9O9FQztv4LrvtjJv6QpKlypF51va0vOeO9m2\n+wcGjZ5ErVB9mV6dGqGMeMa5Q6QTZr5qtbGN6EH9aFQ/xx341cZNzHtzmW7jqA707PI/3v/4Mz76\n4qvsbfb+pIhd96lTMxdnCtJJ9UB75mZZ+qdQnDAFroi0B/orpboUYNsuSqmVjs5UGFzL57pW7DXr\nBw4eInrsZFYsngfkaNZXv70IP99y9Bo4jKh2kXz1zbdUr1aVIf2e4mR8Ao/0HcQX7y8t7Gj54tVk\nvgAAIABJREFU5939A4eOnWDF3Jc5cPgo0VNmsmLuyzl5Zy1g1Wsz8CvnQ6/nRxPVpgUAzRs3YNZL\nLzgl4yWZY+N0Gy+cw4FDh4keP5UVliYsIyODsdNns/qN+bqNn32BqLat6XpXZ7re1Tn79Z+v2+Cw\nfO54du+qh3tKqd+VUjOUUspaflcpFef4aNfEcFcHKApcTrMO5NKse3h45NKsn07Rcxg6W7O+dVdc\ndsdTs9qNpKad4czZdJ03JRUfby8C/Hzx8PCgZZPGbN6522nZLseWHbuIatsagJqh1UhJO8OZs3Zt\n7G3Xxs3C2fz9zlyvn7v4bfo+Zia0vRYKdLjnQrxFZCn6jOL7wEq0iSUTbRV+FG2CaWyp4O8VkfHo\nm6NLAHOUUu+KyJtoB14g0A0tTaiBPls5Uim1VkSeR1thMtAK9AmWaj1Lz37Ueq8HgJuVUv1FpCf6\nivzl2FVHIpKglAoSkSi0XPVPtFrLoScciptmPT7pNGF2NpsAP1/ik5Lx9vIkwM9X5z12gpCKwWzb\nvYeImxoSUrECBw4fpU/MWFJS0+j3SHdaNwu/wrsULgmJyYSJXRv7++o29spq43QOHT1GSKWKbNu1\nm4jwxtnb7tn/E5UqlKe8I5Xw1+MV5y6mPlAXXfEdRDvznlJK/SIifYF+SqnxIvK81UFFAtWUUm0t\nB94uEfnQ2leSUqq3iDyMNhq3swSnG9BTIw8FKqHdglmHs7OAjtYUylOArkqpt0XkYRFpAgwCooCb\nLpPfH3hQKXVQRN4CbiO3st2hFHvN+vBBxEyZibeXJ1UqViAzM5PQkMr0e7g7t3dow9ETf/DIs9Gs\nWfoapUuVclHmnMc2m41JI54nesJUfLy8qFKpUq5TTCs//ox7Ot/m/JDFnKLeSe1SSqVDthIqAlgo\nIqCroO/zbN8KaCEiG6xlD3THA1qZDloztQFAKXVCRC6ISAC6SvsKbRdeJiIVgNrAKuv9vICs2/yf\nATYCzyqlTlvr8yMeeF1ESqIrt69xYCdV7DTrQQHEJyXn5E1MonxgjjI94qaGLJulK7tpC5cQUrEC\nFcoH0vkWfUhbNaQSQQH+nEpIpEqlig7PqzMHkpBk18YJiZQPtGvj8Ma8M2+mzjzvdUIqVchety02\njhefHeDYgNfjmJSLyas0Twc6KKXaK6VaKqWeybP+T2CRtb69UqqeUuo3u3WQc/V8FqWBDKVUH3QF\nVZEcfddxu301V0pNsV4TiO5sqtjt056sr/XF6MPAdsD/XcPn/kcUO816s3DWbtys8/78K8GBAXh7\n5uTt9fwoEpNPk37uPBs2b6dV08Z8/OUGFq1YBUB8UjKJyacJtuuIHZ755masWb9RZ1Y/ExwUmLuN\nnx2e08bfbaFlcy0JOhmfgFfZsi6r+IozRb2Syksc8B/gcxHpBsQrpdaR09luA14WkcnozmeqUirv\nV9f3QAdguYjciO6MMkVkpFJqDDBGRNqiD/uwRKj7RWQAem73/cAUoC2wUkTeBVKxKjYRaQT4WO/l\ni57Z1M96zz2F3B65KHaa9Qb1CKtTk279h+FhszFyYB9WffGVzhvZkq533MYTw0bqvA92xd/Xlw6t\nIxg67mW+3rSNv/76m1GD+jr1D79JwzDCpA7deg/QbTzkGVZ9+gU+3t7c2q4N9999B48Pfl5r1h/u\nToB1IiI+MYkAfz+H53NHW4ytqJ6yzHuqXkQS0APir6E7lnPo8Z4kEVmH1pdHWAPnUehq6VWl1JvW\nwPlKpdQn1qHXfPRN0qWBF5RSG0VkNnAzWni6WSn1ooi0Aaahq7ATwMPoQ72SSqmJ1s3XzwJ3A18A\n3sAm4D6lVA0RGWOt+xn4FK1sj7bWX/EShNCqN2Ye+mHHv2xF5xDasBlkZnJw29eujlJgqrfoCMCh\n2C0uTnKNBFaxXWl1epc2Dv+D9lz53RUzFDZFtpO63jGdlGNx207qvtaO76Q+2OTUTqqoj0kZDIbr\nnOI2JmUwGK5AZoarExQ+ppIyGAxFGlNJGQzuhBuOMZtOymBwJ9zwEgRzuGcwGIo0ppIyGNwId7yk\nyFRSBoOhSGMqqaKKzQa2YvIdYrOBzYbNt7yrkxSci38BkHHiVxcHuTY8AqtceQMzJmUwGAzOxVRS\nBoM7YcakDAaDwbmYSspgcCPccaoWU0kZDIYijamkDAZ3ogiMSYnIK0AL9Iy1A5VS31vPhwDL7Dat\nAQxXSr1zpf2ZTspgMBQaItIOqK2Uaiki9dBTaLcEUEodB9pb25VET9P90dX2aQ73DAY3IjMj0+E/\nV6Ej8CGAUupHwF9EyuWz3aPAB0qpM1fboamk3IwJ02cRt3cfNmxEDxlIo7A8mvVFS7QCvFMUPe+/\nj4yMDEZNnMovBw5SqlTJ/2fvvsOjKvO/j78nFZJJhYQSSgDhBkIRpEgTpLnys+0KioCCFAtgAWSl\ni0IUkCJFAUFApdp4Vt11YUWQ3jvIjQQDoacS0mjJ88eZJJMQAtFMyfB97ZXLmTlTPmHJzfeczJwP\n40cOp0Z4VfvlnfoRBw4dNirLhw+hQUTd3LwbNjJ34WJL3k706t7NyBs5md+jThp5R71NjWrhdssL\n8MHirzjw+0njz7jvs9S/L/f11+3cz7xv/4OXpwddWjWl56MPA3D89FkGT/6E3o91zLnNRZUHrBtR\nYy23Jee7X3+g8908oUxSf4FSqo9Sauptti1RSj1WwO3FXq+ebeeefZaa9flEjh1B5LSPcrZlZmYy\nYcoMFsz8kGWffsz6TVu4cPES637dxJWUVFYumkfk2JFMmfmxreIVkHcvp07HsOrzhUSOG0XklOl5\n806eyoLZ01m2cB7rN2428m7YyJWUFFYuWUDkuNFMmTHbbnkBdh45zqnzl1j5/ggmDnyByEUr82Se\n+NlK5o96jS/fe4v1uw9yIT6RtIyrRH62kgfr17Z9wKws238VzS2nGlZKtQCOaa3zL1wFkkXK/mxW\nCX9LzXpyQTXrQbk16zt3Ex1zJmfaqlIpjHPnL3Dz5k1bRcybd+duOj78kJG3erV8tfBJ+Pv55eZt\n1oStO3YSfTqGBvWMaatK5Uqcu2C/vADbDx2jQzOjC7ZGpQokp6SRkpZuZL6Sgp9vaYID/IzM9Wuz\n7eBveHl6MH/Ua4TaoS3GCZzDmJyyVQTO57vPYxgdl3dFFqlioJSapZTarZT6XCm1XSkVbtn0uFLq\nZ6XUAaVUY6XUcCyV8LbIERcfT5DVD0JwUCCx8fE5l1PT0og+HcP1GzfYsXsvcQkJ1KpRnc3bd3Lz\n5k1ORp8m5uw5EpMu2yLerXnj4gkKyi0DNWrhs/MGkZqaSvTp01y/foMdu/cYeWvWYPO2HZa8p4g5\nc5bEpCS75AWIS7pMsL85N7O/H7FJyTmXU9OvEn3+Itdv3GTnYU1cUjIe7u6U8vayT8DMLNt/FW4t\nkN3w1Bg4p7XOX4jbFKOe7q7IMam/rprlqwkQAeyz2palte5o2e0brbV+OrsS3h7Bbqktf2c0oyZ8\ngJ/ZbNSsZ2XRtlUL9h48RM+XBqNq1qB6taoOO93HLbXw741j1PhII29YRciCtq1asnf/QXr2fwVV\n8z6qVwt36G/d82f+YHAfxnz8BWaf0oSVK+u4YA6itd6qlNqjlNqKUT03SCnVB7istV5tuVsF4NLd\nPqcsUn9dI+C/WutM4JBSKtpq23rLf3cCk2wdJDSkLHGWSQSya9Zzf1CaPdCI5Qs+AWDanHmEVTQa\n6Ie8+lLOfTo+9QxlgnOnG5vnjcuf16qy/IHGLF8038g7+5PcvINeyc37xNN2ywsQGhRIXFLuoZRL\nCZcJDQrIud4sohZLJw4HYPqy1VQMsV+7MjjH+aS01vkPaRzIt71+UZ5Pdvf+OhPGvxjZsu7isk20\nat6MNes2ALepWX99mFXN+hZaNGvCseO/M/K99wHYuHU7dWvXws3NPn8tWrVozpp1Rlffkd+OWfL6\n5uYd/CbxCQlG3o2badGsqZF3/EQj75Zt1K2t7JYXoFXDuqzZvtfIfPI0ocEB+JYulbP9pYmziL+c\nTFrGVdbvPkjLBnVu91TiLskk9ddFAQ8opUxAbcD69/dtgK8w3n37m+U2m/1ENW5Yn4jaiu59X8Hk\nZuKdfw7lux/+g5/Zl04Pt+WZp56g7+Ahlpr15wkODCTQ35+szCy69h6At7cXU98bZ6t4BeRtQESd\n2nTvM8DIO2I4333/o1FZ3r4dz/z9SfoOfMPI++ILBAcFEhjgT1ZWJl2f74u3lxdTI9+1W16ARrVr\nEFG9Cs+Nmoybm4mx/Z9j9fqtmH1K06l5I7p2bE3/CTONmvW//40gfzNHok4x+fNvOBsbj4e7O2u2\n7WXW8FcI9PO98wsWlRNMUsVNGoz/Asu+dj0gBKiDcTyqOfA4MAG4AoQDlYHntdaHrCvhC3vu8KpV\nsqIP7SnsLk4jvP4DAEQfvetjoQ4XXsuYcE7+d+Ud7ulc3Oq3K7Q9+HK7hjb/gQ7YcMCuDcYySf0F\nWuslSilv4FmtdW+llC9wDDivte5zm8d0sGdGcY+RsyCI/LTWV4GmSqndGAfKx2qtbzg4lhAuQyap\nYqC1fs3RGYQA5/jtXnGTSUoI4dRkkhLChWRl3vk+JY1MUkIIpyaTlBAuRI5JCSGEnckkJYQLccFB\nShYpp1ZSatazZdrvvE5/meXP1uRnvw8niz9HFikhXIgckxJCCDuTSUoIF+KCg5RMUkII5yaTlBAu\nRI5JCSGEnckkJYQLccFBSiYpIYRzk0nKxbw/bSYHDltqy4e9eWtt+aIleHl6GTXrz3a11KxPMWrL\nPTwZP2o4NcLD7Zz3iFVeq1r4DZus8nawyvthvrz2q4UH+GDeEvYf+x0TJka/2of66r6cbeu27mLu\niu+MmvW2rej15N9Iz7jKyKkfE5d0mWvXrvNqj6d5+MEHbJIt0wVHKVmkbEgp9TegmtZ6rj1ez6hZ\nj2HV4gVE/RHNqPciWbV4AWCpLf9wOquXLiYwIIABrw+lY7uHOHT0N0vN+qecPnOGyKkfMf+jApvj\nbZT3DKsWf2rJ+z6rFn+aL+8iS95hVnlTWLloviXvTOZ/9KFd8gLsPHiU6LMXWPVRJFGnzzBq+lxW\nfRSZm/njRXz38WQC/c0MGPMBHVs2Ze8RTb1aNej/zJOcvRhL35ETbbZIuSJZpGxIa/1fe77etl27\n6djOUlteLbdm3Wz2NWrLzUbNOsCDTZuwdecu4hMSrWrWK+XUrLu7u9sp76218Ebey7fJm5QzHdo7\nL8D2fYfo2LKpkblKJZKvpJKSmobZ14fE5Cv4mX0IDvQHoMX99di67xD/6Nwu5/EXYuMpXzbYZvlc\ncJCSY1LFSSnlqZRarpTaopRap5QarZSaarl9lVJqo1Jqh2XCKnZx8fEEBVrXrAflrS23rlnfs5e4\n+ARq3ZevttyeNevxCQQFWtWsF1YLv2cvcfGJ1LqvusPyAsQmJhEc4J+bOcCf2MSknMup6RlEnz1v\nZD5whPjE3Ar47m+O4a1JMxn1Sh+75XUFMkkVr97ABa11D6VUdyDI8lUfKKu1fkgpFQh0sUeYW2rW\nx49l1Hvv42f2NWrWsdSsHzhIzwEDrWrLHVSzbvWyRt4xlrzmfHkP0XPAIIfXwkMB1fBvDWL0tLmY\nfX2oVD40TyPsyo8m8ltUNMOnzOZfcz/EZCr+ZqgsF2yLkUWqeDUG1gForVdaevnAqLnyU0p9CawG\nbFL2Flq2LHHxCTnXL8Xlry1vxPKFxuGxaXPmElbBUls+8OWc+3R8sqv9atbL5quFv+u8VrXwT3az\nb816maCcyQngUnwiIVav36xBXZZNfw+AaYuWE1YuhMO/n6RMgD8VQstSp0Y4N2/eJOFyMmUCA255\nfnEr2d0rXjcp4M9Ua52G0WI8H2OKWmiLF2/1YHPWrFsPWGrWy+arLX99aN7a8uaW2vJ3jQO/Rs26\n/WrLWz3Y7A55rWrhN26hRXNLLfy71rXwdq5Zb9yQtZu2G5l/P0lomSDMPqVztg8Y/T7xSZdJy8hg\nw/Y9tGxUn92HjrL42x8BiEtMIi09gyB/P5vky8qy/Ze9ySRVvHYB7YGvlVKPARUBlFKNgbpa66VK\nqR3AJlu8eOOG9Ymoo+je9yVMJjfeeXsY3/3wb6O23LpmHYza8uya9awsur7Qz6hZnzDeFtEKyVub\n7n1ftuQdmi/v4/Qd/KZRWf6iVS18ViZdX+hvyfuO3fICNI5QRNSsTvc3x+DmZmLcoH58t3YDfr4+\ndGrVjG6PdqDfyIlGNXz3pwgK8Kf7/3Vm9PS59Bw6joxr1xg7uJ9dF9aSTmrWi5FSygtjSqoKXMco\nCw0CIoEVgC/GtDVba/1tYc8VXrVKVvThfbYNXEzC6zUCIPrwXgcnuXvhdRoA8MfmnxycpGhM4Q0L\nPZAV07C2zX+gKx84JjXrJZXW+hrwwm022+Q3ekK4OlmkhHAhrrhnJIuUEC7EBdco+e2eEMK5ySQl\nhAtxxQ8YyyQlhHBqMkkJ4UJccJCSSUoI4dxkkhLChchbEIS4naxMsjJSHZ3i7mV/LMVHPuTr7GSR\nEsKFuOAgJcekhBDOTSYpIVyIKx6TkklKCOHUZJISwoVkZTo6QfGTSUoI4dRkkhLChcgxKSGEsDOZ\npFxMSatZ/2DWXPYfOYbJZGL0G69Sv47K2bZu01bmfr4cL09PunRsR6+nnwTgh7XrWLjsa9zd3Xi9\nf2/atWxut7xG5nnsP3oMkwlGv15A5i9WGJk7tLXK/AsLl3+Fu7s7r/d7wWaZXXCQurcmKaVUH6XU\n1Hy3rVRKlb7dY2yUI84Wz2tdsx45dhSRU2fkbMuuLV8wcxrLFnzC+k2buXDxEut+3ZRTsx45biRT\nPppji2gF5913kOgz51g1fyaRI4Yy8aNP8uadMYdPP5zI0o+nsX7Ldi5ciiXxcjJzFi1l2dzpzJsy\ngXWbttotb27ms6ya9xGRbw9l4sy5eTN/9DGfTpnA0jlTWb9lR27mxUtZ9sl05k1+j3Wbt9k1c0l3\nz09SWuvujs5QXEpazfr2Pfvo2KalkTe8CslXrpCSmorZ15fEy5fxM5sJDjIamVs80Iitu/dRytuL\nlk0aYfbxwezjw4S3h9g8591nTs6X+f6CM//zTZvlc8XzSd2Li1Q1pdR/gMrADGAcUA9oCUwE0oGL\nQE8gBPgM8MJoeekPeANLgRZANWCV5fJ5rXVZAKXUN8Ac4ATwpeV1PYHeWusoW31jcfHxRNTO3fXI\nrlk3m33z1KyHVazAjj17ada4EarmfXy+fCW9n3uWUzFncmrLy5YJtlXMHLHxCUSomrl5AwOJjU/E\n7OtLcGAgqWnpRMecJaxCOXbs3U+zRg0BSL96lVffHkfylRQG932eFk0a2TxrTuaExHyZA6wyB+TN\nvO8AzRoZrTTpGVd5dcQ7JF+5wuAX7Zu5pLsXF6laGE3D/sABjMUHYDAwTGu9SSn1D6AMMAGYprX+\nWSnVBRirtR6glPoJ6As8Aryhtb6ulLrlhYAKwHta6/VKqb7AQGCYLb85ayWvZj1f3tHDGf3BNMxm\nXypVKJ+zPelyMnPeH8+5ixfp/dpwfvl2qU0qy+8uc+5lk8nEpFFvMXrSNMy+2ZmNbUnJycyJfMfI\n/Po/+eWbL21Ts+56g9Q9uUht1lpfB+KVUslAFcvtXwPzlFLLgBVa6wtKqZaAUkqNAdyBWMt9PwC2\nAAe01lsKea0LwCyl1LsY/Xt7bPD95Ch5NetliM2TN56QsrkTXLNGDVj2yXQj77zPCKtQjoyr12hU\nvy4eHu5UCauIr48PCUlJlAmyY+aExMIzf5ydeRFh5cuRcfUqjepZZy5NQtJlylh2C0Xh7qkD5xb5\n/63JAtBafwk8DMQBPyilagPXgG5a63Za6zZa639YHuMDmIByt3kNT8t/3wPWaK0fAt4txu+hQCWu\nZr3ZA6zdYJQ5H9G/E1q2DGYfn5ztA4aNIj7RqFnfsGU7LZs0pnWzB9ixZz+ZmZkkXk4mLT2doAD7\nnW6lVdPGhWd+azTxlir1DVu307JJIyPzXuvMGQQF+NskX1ZWls2/7O1enKRaKKXcgWCMRuEEAKXU\nWGCO1vpTpVQoUBfYATwFzFVKtQfKa62XY0xS7wCPKqWe1VqvArKUUtl/W7MPOJQFopRSJuBJjGnM\nZkpczXr9CCJULbq/8iZuJhPjhg7mu/+sNSrL27am2xNd6DdkpFFZ/nx3ggKNxahzuzY8+/IbAIwZ\nMsiuleVG5pp0f/VN3ExujBs6yMhs9qXTQ63o9vij9BtqydwrX+ZXLJnfHCg160VwT9WsK6X6YBxH\n8gbuA6ZgHCyvBzwNvA4kWr56A4HAYqA0xsTVBwgFxmutH1VKlQF+xTjo/hbwd+AoxoH2mYAZmApE\nA7OBT4EXgeXZB9lvp8TVrGdl8seewvZ8nUu1B1oB8MfuzQ5OUjSm0PBCD2QdCq9m8x/o+tF/2PUA\n4D21SJUkskjZlixSf569F6l7cXdPCJflikOH7BgLIZyaTFJCuBAXHKRkkhJCODeZpIRwITJJCSGE\nnckkJYQLycp0vVFKJikhhFOTSUoIF+KCg5QsUqKYmNww+djvg75/2Y0bAGSdOe7gIEVjCg13dAS7\nk0VKCBci7zgXQgg7k0lKCBfienOUTFJCCCcnk5QQLkQmKSGEsDOZpIRwIfLbPSGEsDOZpFzM+9Nm\ncuDwYUwmE6OGvUmDiLo5237esJG5i5bg5enF/3XuSK9nu5KZmck7H0zh96iTeHp4Mn7UcGqEhzsk\n+/ETUQwcOpw+PZ6jV/duebZt3bGT6XPm4u7mxkOtWzJoQD+HZAT44ItvOXAiGhMmRvV+mvo1quZs\nW7f7IPNWr8HL04MuLRrT85G2AByPOcfgqQvo3aVdzm224HpzlExSd00p1UcpNfU225YopR4r4Pau\ntk+Wa+eefZyKiWHV4gVEjh1F5NQZOdsyMzOZ8OF0FsycxrIFn7B+02YuXLzEul83cSUllZWLPiVy\n3EimfDTHnpFzpKWnM2HKVFo0bVLg9olTpjH7w0msWLyALdt2cOLkSTsnNOw8+junLsSy8r1hTHy5\nB5Gff5OzLTMzk4mLv2b+26/w5bg3WL/3MBfiE0nLuErkkm94sF4th2Qu6WSRsq0R9nyxbbt207Hd\nQwDUqBbO5eQrpKSkApCYlIS/2UxwUBBubm482LQJW3fuIvp0DA0i6gBQpVIlzp2/wM2bN2/3Ejbj\n5enJglkzCA0JuWVbzJmzBAT4U6F8Odzc3GjbuiXbdu62e0aA7UeO06GJUZ1eI6w8yalppKSlA5B4\nJRU/39IE+/sZf8YRim2HNV6eHsx/+xVCg2z/saFMO3zZm+zuFZFSahZGhdURQAHdLZseV0q9CYRg\n1FZ1ABoqpb4DngFWAJWAncCzWuvyxZ0tLj6eiNq5de/BQUHExsdjNvsSHBREaloa0adjCKtYgR17\n9tKscSNUzfv4fPlKej/3LKdizhBz9hyJSZcpWya4kFcqfh4eHnh4FPzXMTY+nmCrhuLg4GBiYs7Y\nK1oecUnJRFSrnJvFz0zs5SuYfUoT7G8mNf0q0ecvERZShp1Hj9O0bk083N3xcLdp5aJLk0WqaKpZ\nvpoAEYB151SW1rqjZbdvtNb6aaXU21rrfyil/g8waa1bKKVaYPT72Zz1b3pMJhOTxo9l1Hvv42f2\npVLFCkAWbVu1YO+Bg/QcMBBV8z6qVwt3/t8QOVE+6yQmk4kPXu3FmPnLMPuUJiykjN2zOtEfTbGR\nRapoGgH/1VpnAoeUUtFW29Zb/rsTmJTvcXWA7QBa621KqQxbhAstW5a4+ISc65fi4ggpWybnerMH\nGrF84VwAps2ZS1iFCgAMGfhyzn06PtmVMsG5U4szCA0pS1xcfM71i5diC9wttEuWoADikpJzrl9K\nvExoYG5lerO6NVk6fggA01d8T8WQMrc8hygaOSZVNCby7pZn3cXl7MdZ32aTgz6tHmzOmnXGWnnk\nmCa0bFnMvr452/u/PpT4hATS0tNZv3EzLZo35djx3xn5biQAG7dup25t5XQV4JUqViQlNZUz585x\n48YN1m/aTKsWzR2SpVWD2qzZsR+AI3/EEBoUgG/pUjnbX5r0CfGXr5CWcZX1ew/Tsp663VPZRJYd\n/mdvMkkVTRTwgFLKBNQGqlptawN8BTwI/Ga5Lfun/RjwPIBS6kHAFxto3LA+EXUU3fu+hMnkxjtv\nD+O7H/6Nn9lMp4fb8sxTT9B38BBMwEsvvkBwYCCB/v5kZWXR9YV+eHt7MXXCeFtEu6PDR39j8oxZ\nnD13Hg8Pd9as+4X2bdtQqWJFOrVvx/iRbzNs5FgAunTuSLWqVRySs1Gt6kRUr8xz46bj5mZi7Ivd\nWP3rdsw+penUtCFd27ek/wcfY8LES092IsjfzJGTp5m8dDVnYxOM723HfmYN7U+gufj/Grjg3p7U\nrN8tpVQfoB7GgfE6GMejmgOPAxOAK0A4UBl4Xmt9SCm1DvADWgFfA6HALqCn1rpsYa9X4mrWgeij\nBx2c5O6F16wNwMkfv3BwkqJxa9y50IrzTeUq2fwHus3FM1Kz7oy01kuUUt4Yv5nrrZTyxZiQzmut\n+9zmMR2srj6VfUEp1dOmYcU9yxVHDuc6+ODktNZXgaZKqd0YB8rHaq1vODiWEC5NJqki0lq/VgzP\nUeiunhB/lisWMcgkJYRwajJJCeFCHPEWAVuTSUoI4dRkkhLChTjDHKWUmoHxfsEs4A2t9S6rbZUx\nPsfqBezVWr9yp+eTSUoIUWyUUm2BmlrrFkA/YFa+u0wDpmmtmwE3lVJ3fFeuLFJCuJCsLNt/3UEH\n4P8BaK1/A4KUUv4ASik3jE9mfG/ZPkhrffpOTyi7e6L4mOz6RuS/5qpxDqisrWsdHKSIGnd2dII7\nKQ/ssboea7ktGePTGleAGUqpxsAmrfXIOz2hTFJCuJAsO3wVkSnf5TBgJtAWaGQ5jVEI4pj0AAAg\nAElEQVShZJESQhSncxiTU7aKwHnL5TjglNY6Smt9E1iHcV62QskiJYQLySTL5l93sBboCmDZpTun\ntb4CYPkI2UmlVE3LfR8A9J2eUI5JCSGKjdZ6q1Jqj1JqK8a51wZZziByWWu9GngTWGI5iH4I+OFO\nzymLlBAuxBneJ6W1zl9AcsBq2wmgdVGeT3b3hBBOTSYpIVyIK57DUiYpIYRTk0nKxZT4mvUhb9Gn\n53P06v5Mnm1bt+9k+pxPLDXrrRj0kuNq1idtOsyBC0mYgJEP1aN+ucCcbcsP/sEP+izuJogIDWTk\nQ/UA2HU2jiE/7WFih/tpV62czbK54CAlk1RhlFIblFL18t1WTym14S4fH245i6ddlPia9clTadGs\naYHbJ06Zxuypk1mxZCFbtm/nRJRjatZ3nY3jVFIqK7q1ZkKHhry/8XDOtpRr11m0N4ovn27J0q6t\niUq4woELiZy+nMqSfSdpVMG+hauuQhYpF1Lia9ZnzyA05NaTlt5Ss96qFdt27irgWWxve0wcHaob\n71WsEexHcsY1Uq5dB8DTzQ1PdzfSrt/kRmYmGTduEuDtSYiPN7O6NMXPy/Y7LlJp5cKUUu7Ap0B1\nwBMYZ7WtEkbby1Wsfp2qlPoHMAy4AezWWg+zvCfkUYx32o6w3O9RoIfWOrvWagHwg9b6++L8Hly2\nZj0unuCg3F2q4OAgYs6ctVe0POLSrlI3NDdLUGlv4lKvYvbyxNvDnYHNatH583WU8nDn0ZoVCQ8y\nOySnK5FFKlcPjOaXfkqpssAvQHYd8OvASq31TKXU20BDpZQZGAO00FpfVUp9pZRqZbl/FaAlub18\na4GZSqlSwDWMiqtBtv6GpGbdHnKzpFy7zqe7f+en5x/G18uTvqu3ciz2MrVDAuyWxhXPcS6LVK6W\nQBulVPYbzUpjnJgLoC7GJAWwAWNSisBYjNYopQACyF2Udmmtsyy3o7W+qZT6EeiC8TmmTVrra8X9\nDbhszXpovpr12NgCdwvtIcS3FHFpGTnXL6VeJcTXaDCOSkihsr8vQaW9AWhcsQxH7LxIuSI5JpXr\nGhCptW5n+appuQ3y1qu7Wd1/j9X9G2mtl1tty+8LoBvwBLC8gO1/2T1Ts77RgTXrVUJYe8L4vOzR\nS0mE+nrjaznWFObvQ1TiFTJuGMf0jlxKomqATcqqb8sJz4Lwl8kklWsH8CSwQikVivEZo2waaIJx\nnpyHrW6ro5QK1VpfUkq9i3FMq0Ba6/1KqTCMFuNRtvgGSnzN+vSZlpp1D9b8bKlZD6tIp/YPM37U\n2wwbMQaALo90olrVqnd4RttoVCGYiJBAeny9GTcTjGlbn9W/xeDn5UHHGhXo27gGfb7bioebifsr\nBNMkrAy//nGRRfuiOJmYwpHYyyw9cJKFT7VwSP6SSGrWLZRSHsA8jF07d2A88DYwGONEXV8BScBB\noKnWup3lwPkojAPq+4DXgN5APa31W0qpcOAbrXUTy2uMAfy01m/fKU+JrFn/7ZCDk9y98MrGWWuj\n3h3g4CRF4z54aqFnFvy+TAWb/0A/EX9eatYdwXIaif75bv7J6vIt+xda6++A7/LdvMRqezTGBIZS\nygS0A+544nkhRC7nOvjgoiwT1W7gf5ZPgQthE/I+KfGnWCaqBxydQ4iSSBYpIVyIKx5ilt09IYRT\nk0lKCBeSeee7lDgySQkhnJpMUkK4EBc8JCWTlBDCuckk5cyc7DN0hcoCrl91dIq75+MHgFu3gQ4O\nUrxc8RMkJeinQAhxL5JJSggX4npzlExSQggnJ5OUEC5EJikhhLAzmaSEcCEySQkhhJ3JJCWEC8l0\nwfdJySLlYt6f+hEHDllq1ocPubVmfeFivLw8+b/OnejVvZtRsx452ahZ9/Rg/Ki3qVEt3H55p8/i\nwOGjllr412lQt05u3l83MXfRF0beTh3o9czTRt5JU/k96g8j74i3qBFu3/OdfzB7PvuPHsOEidGv\nv0z9Orldh+s2bWPulyvw8vSkS/u29Hr6CQB+WPsLC1d8g7u7O6/3e552LZrZNXNJJosUoJTyBDYD\nx7TWvYvpOcOxOr+5Pezcs5dTp2NY9flCok7+wah3I1n1+ULAUrM+eSqrl39OYEAAAwYPoePDbTl0\n5ChXUlJYuWQBp2POEPnhDObPmmafvHv3cSrmDKsWzSPqj2hGTZjEqkXzcvN++BGrv1xo5H3jLTq2\nbcOho8eMWvjP5nL6zFkip81k/owpdskLsHP/QaLPnGPV3BlERZ9m1OQZrJo7IzfzR5/w3WezCfT3\nZ8DwsXRs0wJvb2/mLFnOtwtnkZaWwezFX9pskXK9OUoWqWwVAO/iWqAcZdvO3XR82FKzXr0al68Y\nNetms69Rs+7nR3CQ0an3YLMmbN2xk/iERBrUM6atKpUrce6CUbPu7u5u+7y79tCxbRsjb7XwfHkv\n4+9nzs3b9AG27tpt5M2phQ/j3IWLdssLsH3Pfjq2MZpeaoRXIflKCimpqZh9fUm8nIyf2UxwoNFw\n3OKB+9m6Zz+lvLxo2eR+zD4+mH18mDD8DbtkdRWySBlmADWUUosBPyAI48/mNa31QaVUFLAA6Aqc\nwKi26gb8rrXuqZRqCHwMXMc4pU836ydXSrUB3rdsjwEG2KIcNC4unog6tXOuBwcG5q1ZT00l+vRp\nwipUZMfuPTRr0tioWV+2kt49uhs162fOkpiURNkyZQp5pWLKG5+QtxY+T97AfLXw+yy18DX4fMVX\n9O7ejVNnztq9Fj42IZGIWjWtMgcQm5CI2deX4MAAUtPTiI45S1iFcuzYd5Bm99cHID3jKq+OGE9y\nSgqDX+xJiwca2SSfK55PShYpwzDgG+AkRtX6QqVUXWAm0Amj4movMBk4DXyrtW6mlDqtlArE6NJ7\nTWu9Tyn1HtAT+MHq+WcBHbTWCUqpKRiL2DJbf1PWJ803mUxMem8co8ZH4mc2UymsImRB21Yt2bv/\nID37v2JVs27rZHeZ951RjJowKW8tfMsH2XvgED1fHoy6rwbVw6s69EO1t1TZjxrG6MkzMPv6UqlC\nuZw/y6TkZOZMHMe5ixfp/cYIfvn6c0ym4m+Gkpp119cSCFFK9bJc97HattNSnX4Ro2MP4BJGvfpF\nYLJSygeoiNUCpJQqB9QEvrPUrvsCcbYIHxqSt478Umz+mvXGLF80H4Bpsz8hrKKlZn1QbstWxyee\ntlvN+i218LFxhJTNrU9v1rgRyxd8bOT9eB5hFcobeV/N7crr+Pdn7VoLH1qmDLEJiTnXL8UlEGI1\nxTW7vwHL5kwFYNr8xYRVCCXj6jUa1auLh4c7VcIq4utTmoSky5QJCrRb7pJM3ieV1zWMiSi7Ot36\n6OaN21w2YUxcM7XWbYH5BTznWavnbKq1tsmR3lYtmrNm3S8AHPntGKEh+WrWB7+Zt2a9maVmffxE\nADZu2WbXmvVWDzZlzS8bjLzHtCVv7r8L/d94i/iERCPvpq20aNaEY8dPMHLCB0bebTuoq2rZtRa+\nVdPGrP11s5FZnyC0bDBmn9zMA4aPJT4xibT0DDZs3UHLBxrRumljduw9QGZmJomXk0lLzyAowN8m\n+TLJsvmXvckkldcO4Clgm2V3729a6+l38biyQJRSyhvoAmzP3qC1TlRKoZSqq7U+qpR6DfhVa32w\nuMM3btiAiDq16d5nACY3E++MGM533/9o1Ky3b8czf3+SvgPfwGQyGTXrQYEEBviTlZVJ1+f74u3l\nxdTId4s71u3zNqhPRG1F936vGnmHD+W7H/+Dn6+ZTg8/xDNPPU7f14Yaefv0yq2Fz8yka5+XjLwT\nxtotL0Dj+nWJqHUf3V8dipubiXFDBvHdT//Dz9eHTg+1ottjf6PfsNGYTPBSz2cICgwAoHPb1jz7\nyhAAxrzxql0X1pJOatbJfbsA8DBGA3EoxnGo17XWu5VS0RjV6SlKqd1AV611dPZloDPwBhAFLAbm\nYCxWn2mtmyilWgPTMKaqc8ALWutCzxAXXrVKVvTRA8X9rdpEeN2GkAXRh3Y7OspdC6/XGIA/dm90\ncJKiMZWrXuiBrEX+ITb/ge6bHGvXmnVZpJyULFK2JYvUn2fvRUp294RwIa44csiOsRDCqckkJYQL\nccX3SckkJYRwajJJCeFCHPE+JluTSUoI4dRkkhLChcgxKSGEsDOZpETxMAHuJemvk2XkuJbh2BjF\nzBVP1SKTlBDCqZWkf/qEEHcgx6SEEMLOZJISwoXI+6SEEMLOZJISwoXIMSkhhLAzmaSEcCGu+D4p\nWaRcTImrWZ/2EQcOHcFkglFvFZD3syV4eXryf490pNezlrzvT+H3qCg8PT0ZP/Kfds0L8MEni9j/\nm8ZkMjF6YD/q187t4Vu3ZQdzl32Dl6cHXR5uQ6+nuuRsy7h6lcf7v8GrvZ7hH4+0t2vmkkx292xI\nKbVEKfWYvV7PumY9ctwoIqfkdkhk16wvmD2dZQvnsX7jZi5cvMS6DRtzatYjx41myozZ9oqbm9fy\n2pEfzsibd8o0FsyaxrKFc1m/cUvevIsXEDl2FFM+sl9egJ0HDhN99hyrZk8mctggJn68MG/mOQv4\n9P0xLJ0Ryfptu7gQm9teNnfZ1wT4+dk0X2aW7b/sTRYpF3K7mnUgT826m5tbTs169OmYAmvW7Za3\nXVsjb7VwLicn581rNufLu4vomDM501aVypU4d95+eQG27ztIx1bNjcxVK5OckkpKapqR+XIyfpYm\nYzc3N1o0bsDWvUYp0MnTZ4g6dYa2zR+wW1ZXIYvUXVJK9VFKLVZK/aCUOqmUek4p9b1S6oRSqrlS\narpSarNSardSqn++x7orpT5TSq233Mcms35cXDxBQblFmdm15UCemvXr12+wY/ce4hISqFWzBpu3\n7eDmzZucjD6VU7NuD3HxCQRZFWQGBwXlzWupWc+T9758eS016/YSm5BEsFVnXnCAP7GJRlmoUbOe\nTvSZc1y/cYMd+w8Rn2j8WU6et4QRr7xo83zSuydqAm2A/sBIoBHQB3gROKq1HqqUKo1RbbXQ6nE9\nMOrb+ymlygK/AA1sHbbE1aznryx/dyyj3o201KxXhKws2rZqwd4DB+k54FXUfdl5nahm/Z+vM3rq\nHMy+PlQqX46srCz+39r13F9XUalCOYflLMlkkSqa3Zaq9fPAQa31TUvtujcQrJTaitGtF5LvcS2B\nNpb+PYDSSikvrfW14gxX4mrWQ8oSF2+VN66AvJ/NuzXvwJet8na1c816MLGJuZPmpfhEQoKtatYb\n1mPZR+8DMG3hl4SVD+XnzTuIOX+RDdt3cyEuHi9PD8qXLUPLBxoWez5X/O2e7O4Vze2q1sOB9kBb\nrXU7IH/x5zUg0qpqvWZxL1BQEmvWm7Fm3XpLXk1o2Xx5XxuSm3fTlty871rybt1G3dp2rllvcj9r\nN24zMv8eRWiZIMw+pXO2Dxj5Xm7N+vZdtGzckBlj3+KbTz5k1ZzJdH20I6/2esYmC5SrkkmqeDQB\nvtdaX1dKPQG4K6W8rLbvAJ4EViilQoE3tdajijtEiatZb9iAiNq16f7iAEwmN94Z8Rbfff9v/My+\nuXkHvWlUlvexypuZRdcX+uLt5c3UiePtlhegcURtImpWp/vrI3AzmRj3+kt8t+YXo2a99YN069KJ\nfiPexYSJl557miCr41f24IrvOJcG47uklOqDUbX+luVtBV211n0sl/sBYUA68P8wdu+SMaravwH+\nC8wD6lpuG6+1/qmw1ytxDcZA9JH9Dk5y98Lr1Afgj63/c3CSojFVrltoe3BkqWCb/0CPzkiQBmNn\npLVeYnX5R+DH/JetzOBW/Qu4TYhiJcekhBDCzmSSEsKFZLrg4RuZpIQQTk0mKSFciByTEkIIO5NJ\nSggX4orvk5JJSgjh1GSSEsKFuOIxKVmkRPFxc3d0grt33fjoZObxPQ4OUjTuleve+U4uRhYpIVyI\nK37MTY5JCSGcmkxSQrgQVzwmJZOUEMKpySQlhAuR90kJIYSdySQlhAuRY1JCCGFnMkm5sOMnohg4\n9J/06dGdXt275dm2dcdOps+Zh7ubGw+1bsmgAX0dlDLX8RNRDBzyFn16Pkev7s/k2bZ1+06mz/nE\nkrcVg17q56CUMOmbtRz44ywmTIzs1pn64RVzti3/dTc/7DyEu5uJiCoVGdmtM5eSrjBm6Q9cu36T\nm1mZjOjamYgqFWySTc4nJfJQSpmVUtFFfEw7pdTvSqlud773n5eWns6EKdNo0bRJgdsnTpnO7A8/\nYMXiT9mybQcnTv5hyzh3lJaezoTJU2nRrGmB2ydOmcbsqZNZsWQhW7Zv50TUSTsnNOw6fopTlxJY\nMfxFJvR6jPe/XpOzLSX9Kov+t40vh/Zm6bA+RJ2P5cAfZ1iybgcdGiqWDHmeoU+2Z+b3622WL9MO\nX/Ymi5T9PQR8rLX+2pYv4uXpyYJZ0wkNKXvLtpgzZwkI8KdC+XK4ubnRtnVLtu3cZcs4d+Tl6cmC\n2TPuLm+rVg7Lu13/QYeGCoAaFcqSnJZBSrrRYObp4Y6nhztpV69x42YmGdevE+BTmiBzaZJS0wFI\nTssg0NfHIdlLKtndKyKllD/wLVAK2Gy5rQ3wPnAdiAEGYPyj8zlQCfAFxgOngL7AdaXUea31Klvl\n9PDwwMOj4P97Y+PjCbauYw8OIibmrK2i3JVC88bFE2xdxx4cRMwZx+SNS06lrtWuWpDZh7jkFMyl\nvfH29GBglzZ0HjeHUp6ePNqkLuHlytC7fXOenbKI73ccIiXjKkuH9bZZPnkLggDoBRzWWrcBsjuc\nZgFPaq3bAxeBbkAwsFZr3RZ4BnhXa30IWALMtOUCVWQl7TiGU+XNzZKSfpVP12zhp/EDWTthMIei\nz3LszEUW/byNRxrX5d/vvMq7Pf6PD7/92YF5Sx5ZpIquLrDVcnkDUA6oCXynlNoAPIzRwZcINFVK\nbcGYqMrc8kwOkr+O/eKl2AJ3s5xFaGi+vLGOyxsSYCYuOSXn+qWkFEICzABEXYijcpkggsw+eHm4\n07hGFY6cPs/eqDO0iagBQMva1Th8+rzN8skxKQFgIvf/KzeMCvWzVhXqTbXWU4AeGNNUG+Dvjola\nsEoVK5KSmsqZc+e4ceMG6zdtoVWL5o6OdVu35N242WF5W9Wpztp9xwA4evo8oYFmfEt5AxBWJoCo\ni3FkXLsOwJHT56kaGkyVkCAO/mHsnh46dY6qocEOyV5SyTGpotMYterfYkxNiQBKqbpa66NKqdeA\nX4GywB9a60yl1D8Ar9s9oS0cPnqMyTNmcvbceTw8PFiz7hfat21DpYoV6dS+HeNH/pNhI8cB0KVz\nR6pVrWLPeAXk/Y3J063y/mzJG1aRTu0fZvyotxk2YoyR95FOVKta1SE5G9WoTESVCvT4cAlubjDm\n2UdZve0AfqW96Xh/bfp2fJA+Hy3Fw93E/dUq0+S+KlQNCWLM0h/5796jAIzq9ojN8rniWxCkZr2I\nlFKBwGqMaWoz8ALwPDANY6o6Z7mtAvA9EAssAt7AaDp2A+K01nMKe50SWbP+2yEHJ7l74dXvAyBq\n2XQHJyka9w7PF1pxPtDkb/Mf6E+ykgvNoJSaATyIccDuDa31Lqtt0Ri/XLppuamn1rrQ34LIJFVE\nWuskjAkq2zuW/+bf/4gGGlhdX2bDWEIAjv9YjFKqLVBTa91CKVUH4x/oFvnu9qjWOuXWRxdMjkkJ\nIYpTB+D/AWitfwOCLG/b+dNkkhLChcy7w66YHZQHrE8cH2u5LdnqtnlKqXCMwyUjtdaF7qLKJCWE\nsKX8i+Y4YCjQDqgHPH2nJ5BJSghRnM5hTE7ZKgI5bwzTWn+RfVkp9R+gPvBNYU8ok5QQojitBboC\nKKUaA+e01lcs1wOUUmuUUtlvx2kLHL7TE8okJYQoNlrrrUqpPUqprRi/bByklOoDXNZar7ZMT9uV\nUunAPu4wRYEsUkKIYqa1HpHvpgNW22YCM4vyfLK7J4RwajJJOTVH/zbZhZmMf59NvoF3uKNwNJmk\nhBBOTRYpIYRTk0VKCOHUZJESQjg1WaSEEE5NFikhhFOTRUoI4dRkkRJCODV5M6cLM2rWh9Onx3O3\nqVmfa1Wz7rja8mwlpWb9g2X/4kDUaUwmGNXzSepXzz0//LKft/DD1r1GzXq1yozq+SRpV68y8tOV\nxCenUNrbi/f7P0tI4F86D9w9RSapu6SU6qOUmprvtmhL1foIpVT+U6Te7nmmWj5waVNGzfrUQmrW\npzH7w0msWLzAUrPumNrybCWlZn3nsShOXYxj5bjXmNjvGSKX/itnW0p6Bot+2sDS0QNZNmYwUWcv\nsv/EKb5av4PKoWVYOnoQLz/egdnfrSnkFUR+skgVA631JK31NkfnsGbUrM8gNCTklm0F16zvdkDK\nXCWmZv3o73RoXA+AGhXLkZyWRkp6BgCe7u54uruTlnGNGzdvknHtGgFmH05djKWBZdpqoqqz9/c/\nHJK9pJLdvaKpZjnVRGVgRvaNSqklGKecKAu0BkIABXyotf5MKdULeBs4A6RzF+fQ+auKVrMeTEzM\nGVtHKlSJqVlPukJEeKXcLH5mYi9fwVy6FN5engx6qjOdh7+Pt6cnXR68n2rlQ6hVqQK/HjxG56YN\n2HksinNxiQ7JXlLJIlU0tYDGgD/G6SduFnCf+kBLjFbjlUqpRcD7GF19ieQ9/7NzKGm1Zk6U17oS\nLiU9g/k/rOOnySPwLe3Ni5Pmcez0OZ5u2wwdc56eE+fQtHYNgv3NDkxc8sgiVTSbtdbXgXilVDJQ\nUKPmNq31TaXUGSAAo179itb6EoCldt2hCq5Zv3W30Fk4U816aJA/cZev5Fy/lJRMaIAfAFHnLlI5\nNJggP18AHqhVnSPRZ6hdpSLj+xin8k7NuMq6vTYfpF2KHJMqmvz/hBf0T/oNq8sm8taygxP8md9a\ns+642vK74VQ16/UUa3YdBOBI9BlCA/3xLV0KgLCywZw8dymnZv1wdAxVy5Xl1wO/MfPb/wLww9Y9\nPNSgtkOyl1QySRVNC6WUOxAM+AIJd/GYeCDA0nycCrQCbH6Q/fDR35g8Y5altty9gJr1txk2ciwg\nNetF0ahmOBHhlXhuwmzcTCbGvvAPVm/ahbl0KTo1qU/fLu3oPWkuHm5u3F8znCaqOhnXrrN83Rae\nfW8WAb4+THu1p0Oyl1RSs36XLG8beATwBu4DpgATMWp55pB74Lye1votpZQZOKy1DldK9cWoWY/G\nOHD+X631ksJez6hZP2ibb6aYhdc1ippLVM16jVoAnFy90MFJisbtwcfvuTMhyiLlpGSRsi1ZpEoO\nhx8fEUKIwsgiJYRwarJICSGcmixSQginJouUEMKpySIlhHBqskgJIZyaLFJCCKcmi5QQwqnJZ/fE\nvemm8SHgrHOOPSOpuDOZpIQQTk0WKSGEU5NFSgjh1GSREkI4NVmkhBBOTRYpIYRTk0VKCOHUZJES\nQjg1eTOnCzt+IoqBQ4fTp8dz9OreLc+2rTt2Mn3OXNzd3HiodUsGDejnoJS5jp+IYuCQt+jT8zl6\ndX8mz7at23cyfc4nlrytGPSS4/JO+nEzB05fxGSCkY+1pn7lcjnblm87xA/7juPuZiIiLJSRj7dm\n9Z5jzP7fDioHBwDQomYlXnm4iaPilziySFkopcoD72qtX1ZKRWNVsKC1/rEIz3M/8Het9Tu32T4e\niNNaz/nLoQuRlp7OhClTadG04B+GiVOm8dnHsygXGkKv/q/wSIeHua96dVtGKlRaejoTJk+lRbOm\nBW6fOGUan32SnfdlI28N++fddfIsp+Ius2Lg00RdSmDMN+tZMdDo1EvJuMaijfv571s98XB3o/9n\n33Pg9AUA/tbgPv7ZpZXd87oC2d2z0Fpf0Fq/XAzPs/92C5Q9eXl6smDWjAJLP2POnCUgwJ8K5cvh\n5uZG29Yt2bZztwNS5vLy9GTB7BkFln7ekrdVK7bt3OWAlLA96iwdIqoBUCM0mOSMq6RkXAPA090N\nT3c30q5d58bNTDKu3yDA0skn/rx7apKy1FK1xaieigBGA88BdYGewMda61tGD0vX3qdAdcATGKe1\n/kUptQH4H9De8pyPW+4zWGvdVSk1DOiK8Y/Bf7TW79r0G7Ti4eGBh0fB//fGxscTHBSUcz04OJiY\nmDP2ilagQvPGxRMcFJhzPTg4iJgzZ+0VLY+4K2nUDctd+IN8SxN3JQ1zKS+8PT0Y2KEpnT9cSilP\nDx5tcB/hIYHsO32B3SfP8dKiH7iemcnwLi2pW9F5G6OdzT21SFnUBNoA/YGRQCOgj+Xy7fQAzmut\n+ymlygK/AA0s25K11h2UUpOAfwD78z22NUaD8Uml1Ixi+y6KU0mrNXOmvFZZUjKu8emGPfw0rAe+\n3l70Xfgvjp2Po2HlcgT7lqJt7XD2n7rAyK/W8a83uzswdMlyL+7u7dZaZwHngYNa65vARSCgkMe0\nBJ6yTE7fAKWVUl6WbZss/z1TwHOkAb8C6zEmreBi+Q7+otCQssTFxedcv3gptsDdQmcRGpovb2xs\ngbuF9hDi70PclbSc65eS0wjx9wEg6lIilYP9CfItjZeHO43DK3DkbCzVQ4NoWzscgPurlichNZ2b\nmZmOiF8i3YuL1I3bXD5VyGOuAZFa63aWr5pa62sFPEdOcaNSqiowFPib1rrdHZ7fripVrEhKaipn\nzp3jxo0brN+0mVYtmjs61m3dknej4/K2qlmZtYejADh6NpZQfx98vY1/r8KC/Ii6lEjGdeOvxJGz\nsVQtE8Bnv+7j3/t/B+D3C/EE+5bG3e1e/NH7c+7F3b0/YwfwJLBCKRUKvKm1HnWHx5QFLmmtU5RS\njYGqgNcdHlNsDh/9jckzZnH23Hk8PNxZs+4X2rdtQ6WKFenUvh3jR77NsJFjAejSuSPVqlaxV7Tb\n550+05LXgzU/W/KGVaRT+4cZP+ptho0YY+R9pBPVqlZ1SM5GVSsQERZCj7nf4jEpG38AACAASURB\nVGYyMebJh1i95xh+pbzoGFGdvg81os+Cf+HhZuL+quVpUq0ilYL9GfHVz6zaeYSbmZlMePphh2Qv\nqWSRujtfAe2VUlsBd2D8XTxmP5CilNoCbAbmA59YLttcvbp1+HLB3Ntub/pAI1Z9/pk9otyVenXr\n8OXCebfd3vSBxqz6YpEdE93e0L+1yHO9doXcXc9nm0fwbPOIPNvLB5hZMuApu2RzRaYsZzoIKXKE\nV62SFX30oKNj3JXwusbvEKJ/O+TgJHcvPNx4G0HUPIe/W6RI3P/xhunO93ItsmMshHBqskgJIZya\nLFJCCKcmi5QQwqnJIiWEcGqySAkhnJosUkIIpyaLlBDCqck7zsW9KfsDvpeTHJtD3JFMUkIIpyaL\nlBDCqckiJYRwarJICSGcmixSQginJouUEMKpySIlhHBqskgJIZyaLFIu7PiJKDo+8Q+Wrvz6lm1b\nd+yk6/Mv8mzvfny8wDlOI3z8RBQdH/87S1d+dcu2rdt30rVXH559oS8ff+rYvJN+3sNzX6yhx5dr\nOXQ+Ps+25XuO89wXa+i19H988PMeAOJTM3jpq/X0Wf4zPb9cy4FzcY6IXWLJIuWi7qZmffaHk1ix\neAFbtu3gxMmTdk6Y193UrM+eOpkVSxayZft2TkQ5Ju+u0xc5lXiFFS88woRHm/P+/3Kbn1OuXmfR\njt/4slcnlvbqRFTcZQ6cjeOHI3/wREQ1lvToyJttGzJ7Y8k4LbSzkEXKRUnNum1sj75Ih1qVAKhR\nNoDkjOukXL0OWNes3+BGZiYZN24SUNqLPs3q8FhEOADnk9Mo5+fjkOwl1T312T2l1A6gh9Y6SilV\nCfgXcBCjGt0boz59rVIqGqhnqaOaChy2PEVrIARQwIda68+UUs8D/wRigDiMduMvuX0t+2EArfVg\nW36vUrNuG3Gp6dQtn9vxGuTjTVxqOmZvT7w93BnYuh6d531PKQ93Hq1TlfBgfwBiU9IZ9M2vpF67\nweLn2jske0l1r01SXwLPWi4/gbFIZWit22JUpM+5w+PrW+73FPCaUsoN+ADoCHTDqG+H3Fr2hy33\n/cjqOQ7beoEqspLWGOSkeVOuXufTbUf56aXHWPvqExw6H8+xi4kAhJhL81Wfv/HPDo0Y9e/tDk5a\nstxri9QKjEUG4DGgMrABQGt9DriqlCqsCn2bpZY9u1K9LJCstb6otU4F1lnuV1gt+87i+3b+HKlZ\n//NCzKWJS03PuX7pShohvqUBiIq/TOUAX4J8SuHl7k7jSiEcuZDArtMXuZxhFF63rRHGUcvCJe7O\nPbVIaa3jgTNKqaYY33saVtXoGA3DmYD1P9WeVpfzV6qbLPfPlv24wmrZr+FgUrP+57WqVoG1OgaA\noxcSCPXzwdfb+CsSFuBLVHxybs36hQSqBvvxPx3Dvw4ZB/qPX0qivByTKpJ76piUxZfAxxjHjDKA\nh4GVSqnKQKbWOkkplQxUUEqdBB4E9t3mueKBMkqpIMtztQO28Odq2YuV1KzbRqNKIUSUD6bHl2uN\nmvVOTVh98CR+3p50VJXp27wOfVasw8PNjfvDytKkcig1yvgz8t/b+Z+O4drNTN55pODfYIqC3XMN\nxpbdrgsYB7VTgHlADYwpaqTWeqNSagAwDNAYC9FGy8Praa3fUkqZMY4thSulBgIDgd8xFqr/YOxW\nzgPqYqll11r/ZNn9G6y1zj4Qf1vSYGxb4VWMRS7qwyEOTlI07i++c881GN+Lk1Qr4AetdfYpGfvn\nv4PWegGw4HZPoLVOAcItVy8BD2mtE5RSa4AorfWN2zxvu78WXYh7zz21SCml3gUeAZ4uxqf1AX5R\nSqUC+7XWW4vxuYW4591Ti5TW+h3gnWJ+zi+AL4rzOYUQue6p3+4JIUoeWaSEEE5NFikhhFOTRUoI\n4dRkkRJCODVZpIQQTu2eeguCEDm8SgFgatjCwUHEncgkJYRwarJICSGcmixSQginJouUEMKpySIl\nhHBqskgJIZyaLFJCCKcmi5QQwqnJIuXCpGbdNj744lu6j5vGc+OmcyjqVJ5t63YfpNvoD+k5fgbL\n1vyac/vxmHN0fuPdPLeJuyOLlA0ppaIt50O3vu0xpdQSW7+21Kzbxs6jv3PqQiwr3xvGxJd7EPn5\nNznbMjMzmbj4a+a//QpfjnuD9XsPcyE+kbSMq0Qu+YYH69VySOaSThYpFyU167ax/chxOjQxiidq\nhJUnOTWNlDSjhy/xSip+vqUJ9vfDzc2NByMU2w5rvDw9mP/2K4QGBTgkc0knn90rJkopT3Kr1b2B\ncVbb6mOcYjgBiLJHHqlZt424pGQiqlXOzeJnJvbyFcw+pQn2N5OafpXo85cICynDzqPHaVq3Jh7u\n7ni4uzskryuQRar4PIelsl0pVRFLM7LFWIxaq38ppeY6JF1hSlqtmRPltU5iMpn44NVejJm/DLNP\nacJCyjhV1pJKdveKTxPyVbYD2ZXtdYHsFpkN9g6Wn9Ss/4UsQQHEJSXnXL+UeJnQQP+c683q1mTp\n+CHM++cr+PmUpmJIGUfEdCmySBWfLAqubIe8dewO/zOXmvU/r1WD2qzZsR+AI3/EEBoUgG/pUjnb\nX5r0CfGXr5CWcZX1ew/Tsp5ySE5XIrt7xWcX+SrbgewCUo0xaa2x3MfmpGbdNhrVqk5E9co8N246\nbm4mxr7YjdW/bsfsU5pOTRvStX1L+n/wMSZMvPRkJ4L8zRw5eZrJS1dzNjbB+P9ix35mDe1PoNnX\nId9DSXPP1azbilLKg3yV7RgHy+sBtYDFwFngJGDWWvcp7PmkZt22wu8zJpyTP5asykS3xp2lZl38\nObepVg+3/Hcv0NCugYRwEQ4/PiKEEIWRRUoI4dRkkRJCODVZpIQQTk0WKSGEU5NFSgjh1GSREkI4\nNVmkhBBOTd7M6cxMJejNxVlZkJHq6BR3z8MTAFPl2g4OIu5EJikhhFOTRUoI4dRkkRJCODVZpIQQ\nTk0WKSGEU5NFSgjh1GSREkI4NVmkhBBOTd7M6cKOn4hi4JC36NPzOXp1fybPtq3bdzJ9zie4u7nx\nUOtWDHqpn0Myvv/Rxxw4chQTJkYNGUyD/9/efYdHUa9tHP9uGpBCGgm9Cz+lKQiRKiLFcixHBQXh\nIGI5gngUy6sUEQVFFBsCApaDggIWbMdDUQQbBJQO6sMhFOkkISSk0ZL3j5kkCyQYkGRnkudzXVzs\n7uzM3hv04TfDJneTgg9XfvP9j7wxYxZBgYH8rduV9Ot1Ex998RVfLPg6/zkbfxfWfDu/VDOPm/gG\nazf9hsfjYcSDg2l+UUHZwuIflvHGu+8TFBjItd2uoN8tf+fj/8zn8wXf5D9nk2xm9ddflmpmN9Mh\ndZ7Y1ekfA8eB+iLi03694tSWvz1lIlVjY+h39z+5qmsXLmjYoFQzrly9lh07dzH3zckkbN/B8Gdf\nYO6bkwGrsnzMSxP5dMZ0IsIrc8/Dj9Otc0d63fA3et3wt/z95y9eWrqZ16xj+67dzJ020co87iXm\nTptYkPmVScx7e4qV+dHhdOvUgZ7XXUPP667J33/Bt9+Vama309O980xEFvh6QIE7asuX/7Kabp07\nAtCwXl1S0w6TnmF9a03KoVQqh4YSFRlhVZa3bsWyn1edtP/kd95j8MD+pZo5ftUaunVqn5857XB6\nQebUVMJCQ/Izt7u0Jct+WX3S/lNmzGLQgH6lmtntyv1KyhgzAOgMVAGaAiOw2oibAH2BySLS2n7u\nL0BPrPaXsUAWsN9+nvfxmgGTgJlYtertgTeAFsBl9jEnl+T7ckNtedLBgzS9sHFBjsgIEpMPEhpi\n/Y+ekZnJ9p27qFm9GitWrSWu1SX5z13/6+9UrxpLTHRUYYcuMYnJKTQ1XpkjwklMTrEyR0SQkZlV\nkHn1OuJatsh/7obfhGqxMaWe2e3K/ZCyNQI6YbW9DANaAgPs24UZAjwiIj8YY24GiqqpvQT4O1aT\n8SagPlAR+AQo0SF1VhxSa+Zdr+bxeHj+yScY/uwLhIWEUKtGtZNyfvzFV9z0t6t9EfMkp2Ue8Rgj\nxr1EaGgItapXO+lL+9GX87npmqt8kNLd9HTP8ouI5AJ7gfUicgJrhRRexPM/AqYaY4YDa0RkXxHP\nSxCRZPu4B0Rk958ct1Q4pbY8tkoVkpIP5t8/kJRMTHTBvI9rdQkfTJ3ItJfGERYSSs3q1fK3rViz\njpbNm5ZqXoDYKtEknpq5SsHKKK7lxbw/5RWmvTCW0NAQalavmr9t5Zp1tGzepFTzlgU6pCzHi7h9\nqkAAEZmJ1UScBHxpjCnq530UdVyf/gwWp9SWd4hrzcIl3wPWv3jFVokmNCQ4f/vdQx8n+WAKmVlZ\nLPlpGe3aXArA/sQkQipVJCgw0AeZL2XR0h/szP+zMgcXZL7nkeEkp1iZl/4UT/vWrazMSUkEV6rk\nk8xup6d7Z5YGXGCM8QBVsdqJMcY8CUwSkenGmFis61eO4oba8lYtmtH0wsb0vmcIHj8PTz36IPO+\nWkBYSAjdr+jErTf+jYEPPWZVlve/nagIawGamJxMVGRkqecFaNW8KU1NI3rf9yB+Hg+jHn6Aef9d\naGXu3JFeN1zDXUOfwOPxcO8/ehOZlznpINFe1wFV8ZX7mvW8C90i8qgx5jqgp4gMyLsN5ALNgXVY\nF8z/gXWh/V9Aiv3rDmAK1kcQqlBw4fxjEWltjAkFNopIPe/bZ8pVr26dXLfUlte7qDnk5rJ97Qpf\nRym2epdYK8dtq37ycZKz44mp46KfhHh+lPsh5VQ6pEqWDin30GtSSilH0yGllHI0HVJKKUfTIaWU\ncjQdUkopR9MhpZRyNB1SSilH0yGllHI0/bYYdX54PFAxxNcpii/jMAA586b6OMjZ8f/nc76OUOp0\nJaWUcjQdUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhVYZt3pJAt+tvYtac\nD0/btix+JT37DeC2/gOZPP1tH6Q7nVvyPr90HX1mL+H22UvYsO/gSds+WJtAn9lL6DdnKeOWrAPg\neE4Owxb8TL85S+n9wRJW7U7yRWzX0iFVRhWnZv31CeOZPeMtfoqPZ0vC1lJOeDK35P15ZyI7UtKZ\n3acLY3pcynP2IAJIP3KMd37ZzMzbOjOr9xUkHExj3Z5kvvj1DyoFBjCr9xWM6XEpLyxd75PsbqVD\nqoxyQ826N7fkjf/jAF0vqAFAw+jKpGUfJf3IMQAC/f0I9PMj8+hxjufkkH3sBOGVgrj+ojo83tlq\nMo4KDuJQ9lGfZHcr/d69UxhjVgC3i0iCMaYW8DmwHmgAVABGicgiY8x2rJaZdGPMBGCjfYiOQAxg\ngBdF5G1jzD+A/wN2YnX1fSsiM0ryfbihZt2bW/ImZWbTpGpBnVZkcAWSMrMJrRBIhQB/Bre7iB5v\nL6BigD/XXFibepFhJ+3/3uot/O3C2qUd29V0JXW6mcBt9u0bsIZUtoh0Bm7Gqqo6k+b28/4OPGCM\n8QPGAd2AXlh17s7itsYgJ+X1ipJ+5BjTV/7O/IFXsejua9iw9yC/Jx7K3/7B2gR+23+IQW0v8kFQ\n99IhdbrZWEMG4DqgNrAUQET2AEeMMVGF7wrAcrumfRdWnXoVIE1E9otIBrC4pIIXl1Nq1ovLSXlj\nQiqRlJmdf/9ARhYxIRUBSDh4mNrhIURWqkCQvx+talZh035rSH2yYRtLE/by+o3tCPTX/+3Ohn61\nTiEiycAuY0wbrK9PJifXogcBOZz0dyje3dmn1ql77Ofn8fkywCk168XlpLwd6lVl0WbrVPPX/SnE\nhlQiJMj6469ZOZiE5MNkHzsBwKb9KdSNCGXnoXTmrt/Gaze0pUKAv09yu5lekyrcTGAyMB3IBroA\nc4wxtYEcETlkjEkDqhtjtgJtgTVFHCsZiDbGRNrHugIo8UZKN9SsuzFvyxrRNK0awe2zl+Dn8TCy\n6yV8umk7YUGBdGtUk4FtGjPgo+8J8PNwSY1oWteqwis/buRQ9hHu+7Tgj/3NWzoRpCuqYtEG40IY\nY4KAfVgXy9OBqUBDrFXUMBH53hhzD/AIIFiD6Ht797zKdu9q9cHAYOB/WIPqvyIy80wZXNdgDLgl\nL0C92taQS3husI+TnB3/fz5X7hqMdSVVuA7AlyKSd9Xz7lOfICJvAm8WdQARSQfq2XcPAJeLyEFj\nzEIg4fzGVars0iF1CmPM08BVwC3n8bDBwLfGmAxgrYgsO4/HVqpM0yF1ChF5CnjqPB/zPeC983lM\npcoLvXKnlHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH048gqPIpx/r+OlIOnvl5yud0JaWU\ncjQdUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SZdjmLQl0u/4m\nZs358LRty+JX0rPfAG7rP5DJ09/2QbrTuS0vgKdrL/z+8X/49XsMqp1cDuFp1dna1vdRPF17+Sih\n++mQOkfGmO122YL3Y9cZY2b4KNJJMrOyGDN+Au3i2hS6fewLL/H6hPHMnvEWP8XHsyVhayknPJnb\n8gJQuxGeyBhyZr5AzvyZ+HW/rWBbUEU8l/UgZ9YEct6fgKdKdahR33dZXUyHVBkVFBjIm6+/UmiJ\n5s5duwkPr0z1alXx8/Ojc4cOLF/5sw9SFnBbXgBPvQvJ3bzOupO8DyoGQ5BVFMqJ49avoArg8YOA\nIMjK8FlWN9NvMC4GY0wgVgdfA6ACMMprW3Osn19+EK8WGGPM/cDtWMWgn4nIS8aY0fYx6gNX2E3H\nJSIgIICAgML/eBOTkomKjMi/HxUVyc5du0sqSrG4LS8AIZXJ3fdHwf3MwxBSGY5mw4nj5P74FX73\njYXjx8j97WdIOeC7rC6mK6ni6QNki0hnrAr2SV7bngRGi0hX4ASAMaY+0BPoCFwO3GKMqWM/P0hE\nOpXkgDprbutedGxer0q8oIp42l1NzvRR5LwxAk/1+hBb03fRXEyHVPG0BpYCiMge4AgQZW9rAuRV\nVC21f48DGgFL7F9hFHTwrSzpsH8mNrYKSUnJ+ff3JyYWeprlFI7Nm56KJ6Rywf2wcMhItW5HV4PU\nJOsUL+cEubu24Knm25Zot9IhVTy5nPTXJEFYp3HYj+fdzvt6HgW+EpEr7F/NReR7r20+VatGDdIz\nMti1Zw/Hjx9nyfc/0qHdZb6OVSSn5s3d9ise08q6U7U2HE6Fo0es+6nJ1qAKCATAU60uuQf1dO9c\n6DWp4vkZ6ALMMcbUxhpKee3GgrXSWmg/B2AVMN4YEwxkAa8CT5Rm4I2//sb4l19j9569BAQEsPCb\nb7mycydq1axB9yu7MHr44zzyxEgArr2qO/Xr+vZvebflBWD3VnL37bA+fpCbS87Xs/E0b0fukSzY\nvJbcFV/j12co5OSQu3sr7Nri68Su5Ml17Pm9cxhjAoCpQEOsVdQwrIvlzYDGwL+B3cBWIFREBhhj\nBgMDsa5TfSYi4+wL50kiMun0VzlZvbp1crf/tqEk3s55V++i5gC4JS9AvZq1AEh4vK+Pk5wd/yem\nev78WWWLrqSKQUSOA3ef8nA9+/fVwMWF7DMFmHLKY6NLIJ5SZZpek1JKOZoOKaWUo+mQUko5mg4p\npZSj6ZBSSjmaDimllKPpkFJKOZoOKaWUo+mHOVX5FBIGgN/NA30cRP0ZXUkppRxNh5RSytF0SCml\nHE2HlFLK0XRIKaUcTYeUUsrRdEgppRxNh5RSytH0w5xl2OYtCQwe+igD+vahX+9bT9q2LH4lL0+a\ngr+fH5d37MD9997lo5QF3JJ33JuzWCsJeDww4p5/0Lxxg/xti+NX8cbczwkKDOTay9vS77ruZGUf\nYdir00k6lMrRY8cYdNvf6RLX0mf53UaH1HlgjLkaq/BzPvCxiLQ+ZXuSiJRqB1NxasvfnjKRqrEx\n9Lv7n1zVtQsXNGxQ6HNLg1vyrtzwG9v37GfuhKdI2Lmb4a+9xdwJTwGQk5PDmGnvMe/VMUSEhXLP\n6Al0a3spq3/bTLNG9bn7luvYfSCJgU+O1yF1FvR07zwQkQUi8oavc3hzW225W/LGr/uVbm0vBaBh\n7ZqkpWeQnpkFQEraYcJCgokKr4yfnx/tLm7CsrUbubZTW+6+5ToA9iUmU61KpE+yu5WupM6B3UY8\nC6sJJgD4BqsAdJLXc64BHgCut+8/A/QAkoHrRSSHEuS22nK35E08dIimF9QryBIeRmLKIUKDKxEV\nXpmMrGy279lHzdgqrFj/G3HNL8p/bu/HnmZ/UgpTRz3sg+TupSupc9MT+FpEugAPYjUa5zPGXIBV\nv97HrlOPwjoNbGvfblHKec/MbbVmDsrrHcXj8fD8Q/cy4rU3GfLsa9SqGoN3ZdycF59iypNDeezl\nqWiVXPHpSurcLAI+NcZEAB8D+4C885QQ4DOgv4jYndukich6+/ZuILw0w57KsbXlRXBS3tioSBJT\nUvPvHziYQozXKi+u+UW8P/5JAF56dy41q1Zh45ZtRIdXpnpMNBc1qMuJEyc4mJpGdIRP/zNwDV1J\nnQMR2YjVtfcDMA6o47W5lv34YK/Hjp9yCJ8WPDq1trwoTsrboWUzFi1bCcCmLduJjYokNLhS/vZ7\nnnqR5EOpZGZns3TlGtpf3IxfNv7Ovz+bD0BSSiqZWUeIrBzmk/xupCupc2CM6Q1sFZHPjDFJwH+B\n6fZmwRpQ3xpjeojIIl9kdFttuVvytrqoMU0b1qf3Y0/j5/Fj1KD+zPvme8JCgunerjW9rrqCu0a9\ngMfj4d5e1xMZHkbva7oyYuJb9H18DNlHj/HkoP74+en6oLi0Zv0cGGNaYdWup2NdPJ+HVcE+Cfsj\nCMaYhsCXwGXAtryPIBhjPgYmicjSM72G1qyXrHqNrQva2779zMdJzo6ncZzWrKs/JyKrgbgiNre2\nn5MANLEfy7+AIiI9SzadUmWLrjmVUo6mQ0op5Wg6pJRSjqZDSinlaDqklFKOpkNKKeVoOqSUUo6m\nQ0op5Wj6YU5VPh2xfgZUTrxPvmvpnPk3LuozxGWXrqSUUo6mQ0op5Wg6pJRSjqZDSinlaDqklFKO\npkNKKeVoOqSUUo6mQ0op5Wj6Yc4yzC215Xnckvf5r39m3e4kPHgY1qM1zWsUNNd88Ivw5cat+Hs8\nNK0ezbAebUjOyGLYF8s4euIEx07k8H/dLuXimjE+y+82upICjDFn/SN9jTF1jDFn/fFfY0wLY0zj\ns93vbBWntvz1CeOZPeMtfoqPZ0vC1pKOdEZuyfvzjv3sOHiY2QOuYcx17XhuUUGTcvqRo7wTv4mZ\n/a9i1h1Xk5CUyrrdiXy5cRs3NG/AjH49eOiKlrz+3TqfZHcrHVKWJ85hnysp+uecn8nNQIkPKbfU\nludxS9747Xvp2rg2AA2rhJOWfZT0I0cBCPT3J9Dfj8yjxzmek0P2seOEV6zAgMuacF2z+gDsTcug\naliwT7K7VZk63TPGDACuBipj9d+9AgzHqpw6ALwLvAMEATnAXVhtxBcbY+aJyM3GmGeBToA/VqvL\nbGNMD2AskAXsB+4HRgPHjDF/AJnAGOAokALcCrQHhtivcxFWieg84D4g0RhzQERWltTXwi215Xnc\nkjcpPYsm1aLz70cGVyQpPZvQCkFUCPBncKcW9Jj8KRUD/LmmaT3qRVcGIDE9i/s/XELG0WP8u293\nn2R3q7K4kmoK3IC10hkLVADmi8izwDPA2yJyBTAFGC0iLwKp9oDqBNQVkcvt/UcaYyphDZtHRKQz\nMAdrgM0AXhORL4BI4HZ7expwlZ0lDhgAtAMeEJENwAJgWEkOqLPmtlozJ+X1ypJ+5CjTf9rI/EE3\nsmjITWzYncTv+w8CEBNaiQ8HXsv/dWvN8C+X+SqtK5XFIfWdiBwXkSSsVU0VIG8gtAaW2reXAC1P\n2bc90NYYsxRYiPX1qQ58BEw1xgwH1ojIvlP2SwTeMsZ8B3QB8v6qXS0imSKSfr7e3PngpNry4nBS\n3piwYJIysvLvH0jPIibUajBOSEqldkQokcEVCfL3p1WdWDbtPcjPO/aTmnUEgM4X1OTXfQd9kt2t\nyuKQ8n5PHiAX6zQM+3ZeuWLeKZ+3o9grLfvXRSKyVURmYg2fJOBLY8yFp+z3DjDEXkl97vX4qfXq\njuCk2vLicFLeDg2qs+j3HQD8ujeZ2NBKhFQIBKBmeCgJyWlkH7P+2DftTaZuVBhfyx98vsG60L/5\nQArVKus1qbNRpq5J2doZY/yxTsHCgGSvbT9jDZvZQGfgF/vxvMG2AphgjBmPNcReFJEHjDFPYl2f\nmm6MicUq/cyh4OsXDvxhjImwj7/+DPm89ysxbqktd1velrViaVotmttnLMDPAyOvjuPTdQmEVQik\n24V1GNi2CQNmfU2An4dLasXQuk5VGlYJZ9gXy/j69z84euIET13j3L8QnKhM1azbF85vxFoxXQC8\niHVBu5mIpBtjagBvY12nOgrcJSK7jTGLgTARibMvnHfDWnFNEZEZxpg7gH9hnT6mAHcAHbAuxD8G\nGKzrYJuBr7Auqg8HbslrLDbGJIlIFWPMncDTwJ0isrio96I16yWrXt16ACS89Ihvg5wl//4jy13N\nelkcUs1E5FFfZ/mrdEiVLB1S7lEWr0kppcqQMnVNSkRm+DqDUur80pWUUsrRdEgppRxNh5RSytF0\nSCmlHE2HlFLK0XRIKaUcrUx9BEGpYgusAICnWfmrLXcbXUkppRxNh5RSytF0SCmlHE2HlFLK0XRI\nKaUcTYeUUsrRdEgppRxNh5RSytF0SJVhm7ck0O36m5g158PTti2LX0nPfgO4rf9AJk9/2wfpTueW\nvOPe+4Teo16iz6iX2ZCw46Rti39ZT68RL9J39Cu8v/C7/Mc379xDjwefPukxVTzlbkgZY27xdYbS\n4Jba8jxuybvy1/+xY18ic555hLH/vJ1n3/04f1tOTg5j//0R0x6/j5mj2JM/sAAAHqJJREFUHmTJ\n6o3sS04hM/sIz874mLbNSry4ukwqV0PKGFMP6OPrHKXBLbXledySN37TZrq2bgFAw5rVSMvIJD3T\n6uFLOZxBWEgloiqH4efnR9umhuUbhaDAAKY9fh+xkeE+yex25e179yYDccaYp4DmWLVXAVjtwuvt\nBuPngGPATuAeCqlLF5GnjTHdOL1aPReYBdQFlgG3ikgtY0wTYJK9/TAwQEQOleQbdUtteR635E06\nlEbT+rULsoSFkph6mNDgSkRVDiUj6wjb9x6gZkw0K3/dTJsmjQjw9yfA398necuCcrWSwqq4+g5r\n4CwQka7AIOAle/tE4EYRuRLYD/SyHz+pLt1+rLBq9auBiiLSFvgWqGE/93Xgn/brLQLuL6k3eE7c\n1hjkoLzeSTweD+MG9WPktPd54OU3qRkT7aisblXeVlJ52gMxxph+9v1gY0xVoBEwzxgDEILVWLwb\nuy4dwN4GBdXqAUADrKEUC/xkb/8vBQ3GccCb9r4VsEpKfcZJteXF4aS8sZHhJB1Ky79/ICWV2IjK\n+ffjmjRi1uihALw8+wtqxESXesayprytpPIcxTrFy6tTj7Mf2+31WBsRecF+fmF16YVVq3soqG7P\npeAv2kygi33cdiLyrxJ5V8XkpNry4nBS3g4tLmThirUAbNq2k9jIcEIqVczffu/zU0hOPUxm9hGW\nrN5I+2amqEOpYipvK6m8ivMVwN+B5fb1oqtF5GVjDMaYJiLyqzHmAaxTw6IUVq2eAPS0t/eg4Ou7\nDutUcL4xpjeQeKb24vPBLbXlbsvbsnEDmjaoTZ9RL+Pn5+HJO3vx6XfxhAZXonubi+l5ZXvuHjcZ\nDx7uvbE7kZVD2bT1D8bP+pTdiQcJCPBn4Yq1THz4biJCQ3zyHtymTDUY/xljTAywCvgEqIN1euYP\n/EtEfjHGdMS6PnUU2AP0x7oONaSQuvRnOL1avT1WjXtlYClwr4jEGmMuAqZjDcksrGtZB8+UVRuM\nS1a9C6wVztb/vOfjJGfHr1WPctdgXK6GVEkzxkRhndZ9YoypCSwWkQvP5Vg6pEqWDin3KG+neyXt\nMHCrMeYxrOt9Q32cRynX0yF1HonIMeA2X+dQqiwpr/+6p5RyCR1SSilH0yGllHI0HVJKKUfTIaWU\ncjQdUkopR9OPIKjyKe8jkQGBPo2h/pyupJRSjqZDSinlaDqklFKOpkNKKeVoOqSUUo6mQ0op5Wg6\npJRSjqZDSinlaDqkyjC31JbncUvecTM+pPfw8fQZ8QIbtmw/advin9fS64lx9B35Iu/PX5L/+OY/\ndtNjyMiTHlPFo0PqDIwxlxtjYu3b240xocXcb7sxJtQY84Qxpl3JpiycW2rL87gl78pNm9mxN5E5\nzz3O2EH/4Nl35uZvy8nJYezbc5k2bAgzn3mEJas2FNSsvzOXts3O6SdJl3s6pM5sIFZZwzkRkedF\nZPl5zFNsbqktz+OWvPEbfqdr3MUANKxV/ZSa9XTCgisRFW7XrDc3LF//m1WzPmwIsVFas34uyuX3\n7hljAoF3serQs7GG0WSsQtBgrJbicKzaq6bGmFvsXYcYY67F+rpdZe87HasctAIwSkQWeb3ODOBj\nYOEpr9dfREq0J9wtteV53JI36VAaTRvWKchSOYzEQ2l2zXoYGdnZbN+7n5oxVVi5cTNtmjbWmvW/\nqLyupO4A9olIB+BNrGH0loh0AYYBj4vI18Ba4E4R+cPeb6OIXA7sALoCfYBsuyD0ZmBSMV/vhhJ6\nX+fGbY1BDsrr3bbk8XgYd/8ARk6ZyQMvTqVmrNasnw/lciUFtAIWA4jIHGNMODDJGPMo1oooo4j9\nfrR/34210roUq18PEdljjDli11qd8fXO15s4V06qLS8OJ+WNjSqkZj2y4DQurmljZo15FICX3/+U\nGrFas/5XldeV1AlOfu8PYVWsdwQGnWE/77p1D1aNuncPWhAFNetnej2fclJteXE4KW+Hi5uwcPlq\nADZt/eP0mvVnXyc5Nc2qWV+1gfbNL/JJzrKkvK6kfgauBD4yxlwHjAQG29tuwho2UFDLfqbjdAHm\nGGNqAzkicsgY82ev10JEnjsv76QIbqktd1velqYhTRvUpc+IF/DzeHjy7j58umSZVbN+WUt6duvI\n3WMm4vF4uPemq6ya9YQdjH/vY3YnJhPg78/C+NVMfPQ+IsK0Zr04ymWDsTEmCHgL60L2MWAMMBXY\niXVd6VX7sTrAP4AbsarUm4lIujFmArARmGXv1xBrsA0Tke+NMduBZvaxPgYWnfJ6d/zZhXNtMC5Z\n9RrZDcbzZ/s4ydnxa9Gl3DUYl8sh5QY6pEqWDin3cMx1EqWUKowOKaWUo+mQUko5mg4ppZSj6ZBS\nSjmaDimllKPpkFJKOZoOKaWUo5XXb4tR5V1WJgC533zq4yBnqUUXXycodbqSUko5mg4ppZSj6ZBS\nSjmaDimllKPpkFJKOZoOKaWUo+mQUko5mg4ppZSj6ZAqw9xSW57HbXkBPJ1vwq/3UPx6D4WqdU7e\ndnEna9ttD+K54mYfJXQ/1w8pY0w1Y8y0Qh6fYIwZcMpjofbPHz+r2vTzyRhznV0aWqLcUluex215\nAah1AZ7IGHLmvELOog/w63JLwbaginhaX0nO3NfImfsanqhqUL2ez6K6meuHlIjsE5F/+jqH07il\ntjyP2/ICeOo0JneL/XPdD+6HisEQZNdbnTgOOScgqAJ4/CAwELKKqnNUZ+L4792zV0OdgSpAU2AE\nVnNwE6AvsB/4WERaG2P6AY8Du4AsYKMxpjLwCVCRgnJP7+PXAN7Gans5Adzt1ViMMSYAqyK9FlYN\n+2gR+Y8xphtWq8w+QIBEERltjHkW6AT4A5NEZLYxpjnwHnAQSDiPX54iuaW2PI/b8gIQHEbu/p0F\n9zPTITgMjmbDiePkLl+A312j4Pgxcn9fDYcSfZfVxdyykmqEVU0+DqsG/Sb7dp+8JxhjPMBzWPXn\nNwAX2Jv6YdWjd8KqTT/VGOAlEemKNXSePGV7FLDIrlK/FXjafnw8Vt3VVUBLO0MnoK5dxX4lMNIY\nU8k+5mj7NU6c49eg5LitMcipeU+qia2IJ647Oe+MJeetp/FUrwtVavgsmpu5ZUj9IiK5wF5gvYic\nwFpBhXs9Jxo4LCIHROQY8JP9eBNgmX17aSHHbg+MNsYsxRqAp/ZipwBtjDE/Ya2o8rbXFZE1dpb/\neh2rrX2shVhf3+rFyFCqnFRbXhyOzZuRhic4rOB+SDhk2BXsUVUhNRmyMyDnBLm7t+I55cK6Kh7H\nn+7Zjhdx23PKbe+Kc79CHi9sKB8FeonI3iJe+3as1VQn+/dfCnlO3l/tR4G3RWSc90Z7lXemDKXK\nu7a8WmwsS77/kQnPPePrWEVyat7c7b/j1/4acjcsg9hakJEKx45YG9MOQnRVCAiE48fwVK1NzrZN\nvg3sUm4ZUsWRDIQbYyKADKADsBzrelFrrOtShf0wnhXA34E3jDFXAtVE5AOv7VWAbSKSY4y5mYIK\n9n3GmAuB/wE9gCX2sSYYY8bbz3tRRB7wyrCwiAznnVtqy92aF4C928jdv9P6+EFuDjmLP8LTJI7c\no9mwZT25Py/Gr9cD1kpqzzbY7YB/kXShMjOk7CEyGvgO2I5Vgw7WBetPjTGLsS6cn3pBYzTwb2NM\nH3vbgFO2fwJ8YYxpC7wD7DLGjAJGAvOAbcBvwAkRWWaMWYI1HD3AFPsYY+3XeBDYSsGgKzHNmlzE\nzLemFrm9zaWtmPveOyUdo9jcljdP7o9fnvQfVG7SnoLbG5ZZqyz1l2jN+jkyxvQANovIdvtzWt+d\nsgL7S7RmvWTVq1UbgISRA3wb5Cz5Pzyx3NWsl5mVlA94sFZoh7E/BuHjPEqVSTqkzpGILMS6xqSU\nKkE+/5cmpZQ6Ex1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNP2clCqfKgYD4Olyg4+D\nqD+jKymllKPpkFJKOZoOKaWUo+mQUko5mg4ppZSj6ZBSSjmaDimllKPpkFJKOZoOqTJs85YEul1/\nE7PmfHjatmXxK+nZbwC39R/I5Olv+yDd6dySd9y7H9P7yQn0eXICGxJ2nLRt8S/r6DV8PH2feon3\nFyzNf3zzzj30+NdTJz2miqfcDCljzABjzISz3CfUGLO9kMcn2Me7whhz2o8NNsa8aoypX8QxK9s/\nH71EZWZlMWb8BNrFtSl0+9gXXuL1CeOZPeMtfoqPZ0uCb5tM3JJ35a//Y8e+ROaMeZSx9/Xj2Rkf\n5W/Lyclh7DsfMu2Jwcx8aihLVm9kX3IKmdlHePbfH9K2mfFJZrcrN0OqNInIQyKyrYjNrbAqsEpU\nUGAgb77+SqElmjt37SY8vDLVq1XFz8+Pzh06sHzlzyUd6Yzckjd+o9C1TQsAGtasRlpGJumZWQCk\nHM4gLCSYqMph+Pn50baZYfmG3wkKDGDaE4OJjQw/06FVEcrb9+7VN8b8F6gNvAKMApqJSLq9ytqI\nVVP1CVARqwILAGNMP+BxYBeQRUFlVqgxZhZwMfCRiDxjNxgPAQKxaq2O2L9uAyYDlY0xm0Vkekm9\n0YCAAAICCv/jTUxKJioyIv9+VFQkO3ftLqkoxeKWvEmH0mjaoHZBlrBQEg+lERpciajKoWRkZbN9\n7wFqxkSzctNm2jRpRIC/PwH+/j7JWxaUtyHVGGslUxlYB5wo5Dn9gI0iMtQYcxvQx24gfg6r4DMF\nWOX1/CbAhVir0m2Ad7XuncAUEZmZVzwKvIg1GEtsQJ01t9WaOShvrlfrnsfjYdzg/oycOovQ4ErU\njI32YbKyo7yd7v0oIsdEJBlIAwr7r6gJkNfouNT+PRo4LCIHROQY8JPX81eLSKaIpHNy7TvA58CT\nxpgxwAER+f18vZG/Ija2CklJyfn39ycmFnqa5RROyhsbGU7SobT8+wdSUk86jYtr0ohZTz/M1McH\nERZciRoxUb6IWaaUtyF16l/BiV63A+3fPUCOfduvkMe8Hwc4XtSLichioA3wO/CuMaZUKtb/TK0a\nNUjPyGDXnj0cP36cJd//SId2l/k6VpGclLdDi4tYuGItAJu2/UFsZDghlSrmb7933GSSUw+TmX2E\nJas20L7ZhT7JWZaUt9O9dsYYfyAKCAEOAdWNMVuBtsAaQLBO6z4B8oZKMhBujIkAMoAOWFXqZ2SM\nGQJ8JSLv26eMLYEkSuHrvvHX3xj/8mvs3rOXgIAAFn7zLVd27kStmjXofmUXRg9/nEeeGAnAtVd1\np37duiUdqUzkbWka0LR+bfo8OQE/Pw9PDryNT5cuJzS4Et3jLqFn1w7c/dzreDwe7v17DyIrh7Jp\n6x+MnzmP3YnJBAT4s3DFWiY+cg8RoSE+eQ9uU25q1o0xA4CrgArABcALQCXgEazBlAx8D3wGfIq1\ncvoR6C8i9Y0xA4EHge1YF84X2LeHiEhP+zWSRKSK14XzWsBYIBXrwvmdQAzwNfCSiBT5kQitWS9Z\n9S6wPg6w9atZPk5ydvxadit3NevlZki5jQ6pkqVDyj3K2zUppZTL6JBSSjmaDimllKPpkFJKOZoO\nKaWUo+mQUko5mg4ppZSj6ZBSSjlaefu2GFVScnPhSJavUxSf/WNhPLUa+TiI+jO6klJKOZoOKaWU\no+mQUko5mg4ppZSj6ZBSSjmaDimllKPpkFJKOZoOKaWUo+mHOcuwzVsSGDz0UQb07UO/3reetG1Z\n/EpenjQFfz8/Lu/YgfvvvcsnGZ97dRLrNm7C4/EwfOgDtGhyUf62b77/kTf+/R5BgUH8rfuV9Ot1\nMxmZmTz+9HOkHj7MsWPHuP+uAXRqG1eqmcdNfIO1m37H4/Ew4sFBNL+ooJl48Q/LeOPdDwgKDOTa\nblfQ75Yb+fg/8/l8weL852ySzaz++otSzexmOqS8GGOuBuqLyBtnud8lwE0i8lQR27djl5D+5ZDF\nVJza8renTKRqbAz97v4nV3XtwgUNG5RWPABWrl7Ljp27mPvWGyRs287wZ8cz9y3rS5+Tk8OYCa/y\n6btvERFemXuG/h/dOnfkm+9+pH7dOjwy+F72JyZxx5ChLJg7s/Qyr1nP9l17mDvtNRK2/8HwcS8x\nd9prBZlfmcS8t6dYmR8dQbdO7el53TX0vO6a/P0XfPtdqeUtC/R0z4uILDjbAWXvt7aoAeUrbqgt\nX/7LKrpd3hGAhvXrkXo4nfSMDABSDqVSOSyUqMgIq7K8dSuWrVxFZEQ4h1JTAUg7fJjI8NKtLo9f\ntYZundpbmevVIe3w4YLMqamEhRZkbndpS5b9suak/afMmMWgAX1LNbPb6UrKi90o0wyrifh2rMaY\nz4A3gOUicrExpgawE6gmIonGmHXA/wH3iEhPY8xErEosf+ANEZlhH36IMeZarK/5VSJyuCTfixtq\ny5OSD9L0woJTpaiIcBKTDxIaEkJUZAQZGVls/2MXNWtUY8WqNcS1asm9/W9n3lcL6N7zdtLSDjPt\n5edLNXNi8kGamoLv94uKiCAxOcXKHBFBRmYW23fupmb1qqxYvZa4lhfnP3fDb0K12BhiorUw9Gzo\nkDpdfeBSoKN9/yfgIyDN7t3rgFV91dYYE4/Vo3cEwBgTBfxNRBoaYwKBAV7H3SgizxtjZgNdsYaf\nMzikMcg7hsfj4flRwxj+7POEhYZSq0Z1IJfP5y+iRtVY3n71RX7/3xaGP/sC82b4rrHeu23J4/Hw\n/IjHGDHuJUJDQ6hVvdpJ2z/6cj43XdPDFzFdTU/3TtcKaAQssX+FAfWAH4DLsIbUa0A7oD2Qf4FB\nRA4Cm40xnwO3Ae95HfdH+/fdQOmeo5zCKbXlsVWqkJR8MP/+gaQkYqKj8+/HtbqED6ZNYtpL1qCq\nWb0aq9dvoONl1oXyCxtdwIGkJE6cOFGKmaNJPClzMjFVClZGcS1b8P6Ul5n2whhCQ0OoWb1q/raV\na9bTsnmTUstaVuiQOl0OVuvwFfav5iLyPbAUq+W4EfAF0BRrYC3x3llErgGeBi4BvvTa5F3H7tPu\nNKfUlne4rA0Lv10KwKbfNxNbpQqhIcH52+9+6DGSD6aQmZXFkh+X0a7NpdStVYt1m34FYPfefYRU\nqoS/v3/pZY67lEVLf7Ayy/+IrRJNaHBB5nseGU5yipV56U/xtG/dCoD9SckEV6pIUGBgqWUtK/R0\n73TfAV2MMcFYTcWvAk8Ay4DHgDQRyTHG5GKtukZiragwxtQDbhCRicBqY8wqH+QH3FFb3qpFM5pe\naOh9z2A8Hj+eeuwh5v1nPmGhIXS/4nJuvfF6Bj74KB4P3Nu/L1EREdx20/UMf3Y8/Qb9i+PHTzD6\n8UdKN3PzpjQ1jel930P4eTyMengI8/67iLCQYLp37kivG67lrqHDrJr1f/QmMsJaNCcmJRPtdR1Q\nFZ82GHvxunC+FRgInAA+E5Fx9vZlwBf2taWxQJyI9DDGXIFVq3471ileHazrVB+LyGTvjyAYYyZg\nXZ+acaYsrmswzs1l+7rS/xfCc1Xv4tYAbFv1k4+TnB1PTN1y12CsQ8qLMeZeoIGIPOHrLDqkSpYO\nKffQa1I2Y0w74HHgG19nUUoV0GtSNhFZDjT0dQ6l1Ml0JaWUcjQdUkopR9MhpZRyNB1SSilH0yGl\nlHI0HVJKKUfTjyCo88PjgQqVfJ2i+I5kA5Czyl0fi/O/2jc/QdWXdCWllHI0HVJKKUfTIaWUcjQd\nUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTD3OWYW6oWffmlrzPz1vMuh178QDDbu5K87rV87d9\n8MNqvvzlV/w9HprWqcawm7syddFylst2AHJyc0lKy2D+yHt8E96FdCX1J4wxt5zl8wcYY246w/ae\nfz3VnytOzfrrE8Yze8Zb/BQfz5aEraURq0huyfvzlj/YkZjC7KH9GNPnGp6btzh/W3r2Ed75diUz\n/3U7sx7qS8K+ZNZt38N9Pdrx7gN9ePeBPtzStgU927XwSXa30iF1Bnb7S5+z2UdEZojIp0UcLwh4\n+DxE+1NuqFn35pa88Zt30LWF1WDcsFo0aZnZpGcfASDQ359Af38yjxzl+Ikcso8eIzy4Yv6+x0/k\nMOfHNfTt1Mon2d1KT/fObDIQZ4wZBXSxHwsE7hCRBGPMrVhD5ziwSkQeNMaMxmo1ngl8CFSwf90P\n3AU0N8ZMEZHBJRncDTXr3tySNyktgya1q+XfjwwNJiktg9CKFagQGMDgq9vTY8x0KgYGcE3LC6kX\nW1Ac+vX6zXS4sD4Vg7R772zoSurMXsTq4ZsPPCMiXYB3gMHGmFDgOaCbiHQEGhhjunjt2xXYJSJX\nAH2BWPt4UtID6qy5rTHISXm9sqRnH2H61/HMH3E3i0b9kw079vL77gP52+fFr+fmy5r7IqWr6ZAq\nnn3Av4wx3wNDgWigMfA/EUm3n7MUaOm1z3KgnTFmKnCBiCwoxbxn5JSa9eJyUt6Y8FCS0jLy7x9I\nSyemcggACfuSqR0dQWRoMEEB/rRqWItNO/cBkHnkKPsOHaZmdLhPcruZDqnieQZYKCKXY1WoA+Ry\ncl16EFZFOwAishe4GJgHDLJPGR3BKTXrxeWkvB0urM+itQLArzv3EVs5lJCKFQCoGRVOwv5kso8e\nA2DTH/uoGxMJgOxJpEFstE8yu51ekzqzHKyvURUgwRjjAW4E/IHNQCNjTJiIHAY6A2OBbgDGmG5A\noIjMN8b8CkwBZlBKX3M31Ky7MW/L+jVpWrsat78yCz+Ph5G9uvPpig2EVaxAt4sbM/DKOAZMmkOA\nnx+X1K9J64a1AUhMTScqLNgnmd1OG4zPwBgTA6wCkoFKwHbgdWA6cCcQCjyCNcx+FJFhXhfO/wPM\nwrqongM8BcQD64BNItLrTK/tugZjwC15AerVtyoWE94d7+MkZ8f/6rvKXYOxrqTOQEQSgTqFbKrp\ndXveKfuM9rrbsZB9m/z1ZEqVH3pNSinlaDqklFKOpkNKKeVoOqSUUo6mQ0op5Wg6pJRSjqZDSinl\naDqklFKOph/mVOVTkPVznvxadvmTJypf05WUUsrRdEgppRxNh5RSytF0SCmlHE2HlFLK0XRIKaUc\nTYeUUsrRdEgppRxNh1QZtnlLAt2uv4lZcz48bduy+JX07DeA2/oPZPL0t32Q7nRuyTvu9WncNmgo\nvQc9zIbf5KRti39YTs97/8Xt9z/CrE++yH/8y0XfcuOdg7n57gdYunxlaUd2tXI7pIwxVxhjPv6T\n5xSrMt0Yc7UxZtD5zvhXuKW2PI9b8q5cu57tu/Yw941XePbxhxg7cWr+tpycHMa8OoXpLzzDrNdf\nZMmyFew7kEhKahqTZnzA+5MnMPX5p1n843KfZHercjukiqO4lekiskBE3ijVcH/CLbXledySN37V\nWrp1agdAw3p1SDucTnqG1cOXkppGWGgoURER+Pn50e7SS1i2ai3Lf1lD+9aXEBocTGyVKMY89qBP\nsrtVufnePWNMIPAuUBfIxmoiDjXGzMLqx/tIRJ4xxiwFNtq7JVGMynRgJdBMRB41xrwMxAEVgaki\n8pYxZgawB7gUq9ihr4isLsn365ba8jxuyZt4MIWmjRsVZIkIJ/FgCqEhIURFhJORlcn2nbupWb0q\nK9asJ+4Sq0knK/sIg54YTVp6OkPu7Eu7S1sW9RLqFOVpJXUHsE9EOgBvApWxmlvuBdoBD3g9d6OI\nDPG6X6zKdGNMRWC7XbveCatUNE8FEbkKeA3of57f21/jtlozB+X1roTzeDw8P/wRRox/hSEjxlCr\nelVyc624h9LSeH3sk4wb9jDDx72CVskVX3kaUq2AnwBEZA7wG7BaRDLtqnTvPrNTr2wWqzJdRLKB\nKGPMMmA+EOO1+Qf7912AT7u2nVRbXhxOyhsbHU3iwZT8+weSDhITHZV/P+6SFrw/aQLTxj9NaEgI\nNavHEh0VQctmTQgI8KdOzRqEBFfi4KFUX8R3pfI0pE5w+vs9XsRzj3rfKW5lujGmM3Al0NledR0p\n4rV8WvDopNry4nBS3g5tWrHoux8B2CRbiK0SRWhwQTPxPY89SXLKITKzslm6bAXtL21JxzatWLF6\nHTk5OaSkppGZlU1keGWf5HejcnNNCvgZa4B8ZIy5DmhR3B3PojK9CrBTRI4ZY24A/O0L7KXOLbXl\nbsvbqnkTmja+gN6DHsbPz8Ooofczb/7XhIUE0/3yDvS67mruemQEHg/c2/dWIiOsRXOPzh257b6h\nAIx8cBB+fuVpffDXlJuadXtYvIV14fwY8G/gJhHJ+xhBkohUsS+cDxGRjcWtTAe+ApoBY4CvgSzg\nM6A9kAb4Ax+LyH/sAdlTRAacKa/WrJeseqYpANt++d7HSc6Op2qDclezXm6GlNvokCpZOqTcQ9ec\nSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH0yGllHI0/TCnUsrRdCWllHI0HVJK\nKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH+3/FKGgzACmLwQAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<Figure size 216x1836 with 2 Axes>"
            ]
          },
          "metadata": {
            "tags": []
          }
        }
      ]
    },
    {
      "metadata": {
        "id": "DRL6XhixwueM",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}


================================================
FILE: experiments/tf_trainer/tf_hub_tfjs/notebook/EvaluatingClassifier.ipynb
================================================
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "EvaluatingClassifier.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "metadata": {
        "id": "DnVolqQO5UMn",
        "colab_type": "code",
        "outputId": "4e8cb139-8ed2-4b08-e282-57465b9aa39e",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 53
        }
      },
      "cell_type": "code",
      "source": [
        "!pip3 install --quiet \"tensorflow>=1.11\"\n",
        "!pip3 install --quiet sentencepiece"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\u001b[K    100% |████████████████████████████████| 3.2MB 10.3MB/s \n",
            "\u001b[K    100% |████████████████████████████████| 1.0MB 19.5MB/s \n",
            "\u001b[?25h"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "nworUNj67VL5",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "import os\n",
        "import pandas as pd\n",
        "import tensorflow as tf\n",
        "import matplotlib.pyplot as plt\n",
        "from sklearn import metrics\n",
        "import sentencepiece\n",
        "import zipfile\n",
        "from google.colab import auth\n",
        "from google.colab import files\n",
        "from IPython.display import HTML, display"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "koTqnJ5t7vR5",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "Use Kaggle's My Account page to down load a kaggle.json file and re-upload it here."
      ]
    },
    {
      "metadata": {
        "id": "zuJpXuS07hrD",
        "colab_type": "code",
        "outputId": "b60430c3-dd36-44e4-f054-be4befe8998a",
        "colab": {
          "resources": {
            "http://localhost:8080/nbextensions/google.colab/files.js": {
              "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=",
              "ok": true,
              "headers": [
                [
                  "content-type",
                  "application/javascript"
                ]
              ],
              "status": 200,
              "status_text": ""
            }
          },
          "base_uri": "https://localhost:8080/",
          "height": 76
        }
      },
      "cell_type": "code",
      "source": [
        "!mkdir -p /root/.kaggle\n",
        "token_file = \"/root/.kaggle/kaggle.json\"\n",
        "uploaded = files.upload()\n",
        "with open(token_file, \"wb\") as f:\n",
        "  f.write(uploaded[\"kaggle.json\"])\n",
        "  os.chmod(token_file, 600)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "     <input type=\"file\" id=\"files-c219c0bd-6da2-4f3a-85d7-47b14b17d7d2\" name=\"files[]\" multiple disabled />\n",
              "     <output id=\"result-c219c0bd-6da2-4f3a-85d7-47b14b17d7d2\">\n",
              "      Upload widget is only available when the cell has been executed in the\n",
              "      current browser session. Please rerun this cell to enable.\n",
              "      </output>\n",
              "      <script src=\"/nbextensions/google.colab/files.js\"></script> "
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "Saving kaggle.json to kaggle.json\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "pF9BCpwc76_b",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "import kaggle"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "5IMnm-_f91DV",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "Download the test set and extract the labeled portion"
      ]
    },
    {
      "metadata": {
        "id": "exMy3FQp8xg8",
        "colab_type": "code",
        "outputId": "96b64e4c-c76d-4db8-b527-84bf334c66cd",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 71
        }
      },
      "cell_type": "code",
      "source": [
        "kaggle.api.competition_download_file('jigsaw-toxic-comment-classification-challenge', 'test.csv')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            " 21%|██▏       | 5.00M/23.4M [00:00<00:00, 29.0MB/s]"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "Downloading test.csv.zip to /content\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "100%|██████████| 23.4M/23.4M [00:00<00:00, 53.3MB/s]"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "id": "GFjhB3WO9RuC",
        "colab_type": "code",
        "outputId": "97272c2f-fb02-4d61-a184-74778d81097d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 71
        }
      },
      "cell_type": "code",
      "source": [
        "kaggle.api.competition_download_file('jigsaw-toxic-comment-classification-challenge', 'test_labels.csv')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "100%|██████████| 1.46M/1.46M [00:00<00:00, 119MB/s]"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "Downloading test_labels.csv.zip to /content\n",
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "id": "4Grw9zJt9Udw",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "test_labels = pd.read_csv('test_labels.csv.zip', index_col='id')\n",
        "testset = test_labels.loc[test_labels['toxic'] != -1].join(\n",
        "  pd.read_csv('test.csv.zip', index_col='id'))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "neCTJdjJ-hKn",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "Load the pre-trained toxicity model from Google Cloud Storage"
      ]
    },
    {
      "metadata": {
        "id": "DYVE2PB99XZx",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "auth.authenticate_user()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "D9gQqslA-RKJ",
        "colab_type": "code",
        "outputId": "069cf772-656f-4696-e961-ec1d67902b9c",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 89
        }
      },
      "cell_type": "code",
      "source": [
        "!mkdir -p tfjs_model\n",
        "!gcloud storage cp --recursive gs://conversationai-public/public_models/tfjs/v1/* tfjs_model"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Copying gs://conversationai-public/public_models/tfjs/v1/saved_model.pb...\n",
            "Copying gs://conversationai-public/public_models/tfjs/v1/assets/universal_encoder_8k_spm.model...\n",
            "Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.data-00000-of-00001...\n",
            "Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.index...\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "ZwYHVxgE_BIS",
        "colab_type": "code",
        "outputId": "9bc8b76a-2371-4291-b086-99d20ccf51de",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 289
        }
      },
      "cell_type": "code",
      "source": [
        "predict_fn = tf.contrib.predictor.from_saved_model(\n",
        "  'tfjs_model', signature_def_key='predict')\n"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\n",
            "WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
            "For more information, please see:\n",
            "  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
            "  * https://github.com/tensorflow/addons\n",
            "If you depend on functionality not listed there, please file an issue.\n",
            "\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n",
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "Use standard file APIs to check for files with this prefix.\n",
            "INFO:tensorflow:Restoring parameters from tfjs_model/variables/variables\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "CrVX18LN__4r",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "Load sentence piece model and preprocess test data"
      ]
    },
    {
      "metadata": {
        "id": "bMjJEb25_59p",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        ""
      ]
    },
    {
      "metadata": {
        "id": "5IYO0GF2_fEf",
        "colab_type": "code",
        "outputId": "7c81d410-f695-4bbc-daff-d4a10f23ace9",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        }
      },
      "cell_type": "code",
      "source": [
        "sp = sentencepiece.SentencePieceProcessor()\n",
        "sp.Load('tfjs_model/assets/universal_encoder_8k_spm.model')"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 36
        }
      ]
    },
    {
      "metadata": {
        "id": "x2votZMZAnnG",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "Score the sentences with toxicity model"
      ]
    },
    {
      "metadata": {
        "id": "uU3xQGiKA993",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "def progress(value, max=100):\n",
        "    return HTML(\"\"\"\n",
        "        <progress\n",
        "            value='{value}'\n",
        "            max='{max}',\n",
        "            style='width: 100%'\n",
        "        >\n",
        "            {value}\n",
        "        </progress>\n",
        "    \"\"\".format(value=value, max=max))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "Xs3Glf93Bp6O",
        "colab_type": "code",
        "outputId": "071890ba-b1b0-4e17-fd5a-6af93eaffb20",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "cell_type": "code",
      "source": [
        "tox_scores = []\n",
        "nrows = testset.shape[0]\n",
        "out = display(progress(0, nrows), display_id=True)\n",
        "for offset in range(0, nrows):\n",
        "  out.update(progress(offset, nrows))\n",
        "  values = sp.EncodeAsIds(testset['comment_text'][offset])\n",
        "  tox_scores.append(predict_fn({\n",
        "      'values': values,\n",
        "      'indices': [(0, i) for i in range(len(values))],\n",
        "      'dense_shape': [1, len(values)]})['toxicity/probabilities'][0,1])"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "        <progress\n",
              "            value='63977'\n",
              "            max='63978',\n",
              "            style='width: 100%'\n",
              "        >\n",
              "            63977\n",
              "        </progress>\n",
              "    "
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {
            "tags": []
          }
        }
      ]
    },
    {
      "metadata": {
        "id": "nXLm_GNWbuqP",
        "colab_type": "code",
        "outputId": "10537a68-d4be-4a10-9ab1-284489974cc7",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 71
        }
      },
      "cell_type": "code",
      "source": [
        "!gcloud storage cp gs://conversationai-public/public_models/tfjs/perspectiveapi.csv ."
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Copying gs://conversationai-public/public_models/tfjs/perspectiveapi.csv...\n",
            "/ [0 files][    0.0 B/  2.0 MiB]                                                \r/ [1 files][  2.0 MiB/  2.0 MiB]                                                \r\n",
            "Operation completed over 1 objects/2.0 MiB.                                      \n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "9-x3fQEjb2-X",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "perspective_api=pd.read_csv('perspectiveapi.csv')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "dGdFOCzbzDTJ",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "top_kernel = kaggle.api.kernels_output(kernel='tunguz/superblend', path='.')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "ydkfpaSV4GCK",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "top_kernel_scores = testset = test_labels.loc[test_labels['toxic'] != -1].join(\n",
        "  pd.read_csv('superblend.csv', index_col='id'), rsuffix='_predicted')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "nkeZUDG_31c4",
        "colab_type": "code",
        "outputId": "30e2889b-15b0-4aea-bf2b-86794434776b",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 376
        }
      },
      "cell_type": "code",
      "source": [
        "plt.figure()\n",
        "\n",
        "fpr, tpr, _ = metrics.roc_curve(testset['toxic'], tox_scores)\n",
        "plt.plot(fpr, tpr, label='Tensorflow JS model')\n",
        "\n",
        "fpr, tpr, _ = metrics.roc_curve(testset['toxic'],\n",
        "                                perspective_api['PerspectiveAPI'].values)\n",
        "plt.plot(fpr, tpr, label='Perspective API')\n",
        "\n",
        "\n",
        "fpr, tpr, _ = metrics.roc_curve(top_kernel_scores['toxic'],\n",
        "                                top_kernel_scores['toxic_predicted'])\n",
        "plt.plot(fpr, tpr, label='Top scoring Kaggle kernel')\n",
        "\n",
        "plt.xlabel('False positive rate')\n",
        "plt.ylabel('True positive rate')\n",
        "plt.legend(loc='lower right')\n",
        "plt.ylim(0.75, 1.0)\n",
        "plt.xlim(0.0, 0.25)\n",
        "plt.title('Performance on Kaggle Toxic Comments Challenge Test Set')\n",
        "plt.show()\n"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAf8AAAFnCAYAAACoxECQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xd4FNX6wPHv7qZX0oHQO4TeqyAE\nRIqoiAZFuKKCiCAoKqIgVUQRUAT7Va7KFVCwwAVFqUr7gVgoUlMJ6T2bZNv5/REZWEMIYDb1/TwP\nzzNzzpR3J8O+O2dmztEppRRCCCGEqDb05R2AEEIIIcqWJH8hhBCimpHkL4QQQlQzkvyFEEKIakaS\nvxBCCFHNSPIXQgghqhmn8g5A3LjmzZtTr149DAYDSim8vLyYMWMGPXr0uKHtZGdnM2bMGPLy8li3\nbh1+fn4Oirjy6d+/P6+++iqdO3cG4NChQzz99NN8/PHHNG7cuFT3FRcXx6BBgzhx4sR1r3P33Xdj\nNBqxWCzExsbSsGFDABo3bsyqVatuOIY1a9aQlZXFlClTrnudrKwsli9fzs8//4xOp8PJyYlRo0Yx\nbtw4dDrdDcdQ1o4ePYqnpyfNmjW7ofU2btzImjVrKCgowGQy0alTJ5599lmCgoLYsGED27Zt48MP\nP7zu7VksFsLCwvjpp5/YtWvXDa9fmv7880+mTZsGQE5ODrm5uYSEhAAwcuRIHn300Rve5pkzZ8jO\nzqZjx45F6lJSUli0aJF27js7OzN+/Hjuvvvua27TaDTy448/Mnz48BuORxSS5F9JffLJJ9SsWROA\nI0eOMGnSJLZt24a/v/91b+PUqVNkZGSwe/duR4VZJZw5c4ann36alStXlnriv1kbN24ELv9w2LZt\n2z/a3rhx425oeavVysMPP0zLli3ZvHkzLi4uXLx4kcmTJ5OVlcXUqVP/UTxl4YsvvqBnz543lPw/\n+eQTPv30U95++20aNWqEyWRi1apVjBkzhm+//daB0ZaNFi1aaOfSzfyQuZpt27bh4eFx1eQ/Z84c\nmjdvzrJly9DpdJw7d46IiAhatGhBq1atit3mH3/8webNmyX5/wOS/KuATp06Ua9ePY4ePcqAAQP4\n4YcfeOONNzAajdSvX5+lS5fi7+/PypUrSUxM5M8//6Rfv3588cUXpKamMnjwYNauXcuZM2d45ZVX\nyMvLw9vbmzlz5tCmTRs2btzIjh07yM7OJiwsjL59+7Js2TLatm3Ljh078PX15aWXXmLp0qWcP3+e\n++67T/vyX7VqFd988w1Wq5XGjRvz2muv4ePjw8qVK0lPT9fi8fPzY/Xq1QQHBxMbG8vMmTNJSkrC\nx8eH+fPnExYWRkJCAnPnziUyMhKAWbNm0bdv3yLHIz4+ntmzZxMXF4ezszOPPPIId955J3FxcURE\nRDBhwgQ2bNhARkYGzz//PEOGDCn22CYmJjJp0iQWLFhA+/bttfIff/yRFStWYDKZ8PT0ZNGiRbRs\n2RKbzcaiRYvYtm0b9erV49Zbb2Xv3r188sknxMXF8cQTT5CVlUXv3r1JTEzktttuo2vXrtp2lVKs\nWrWKb7/9FpPJxIABA3j++ecxGAw3dE7s37+fJUuWkJ+fj4+PD3PnzqVVq1Y8+uij9OnTh7Fjx5KZ\nmcnQoUP597//zZYtW0hPT2f+/PlER0czc+ZMUlJS8PX1ZcGCBbRs2dJu+zt37iQtLY2XXnpJi61W\nrVosW7aMnJwcoPCHyezZs4mPj8fZ2ZkJEyZwxx13EB0dzdixY7n//vu1HzFLlixh1apV2rm5YMGC\n614O4Pvvv2flypUYjUYaNmzI0qVLqVGjBsuXLyc3N5f4+HhOnTpFQEAAq1evZtu2bWzevJk9e/aQ\nkpLCoEGDeO6550hJScFkMjF8+HCefPJJu89ssVhYtWoVy5cvp1GjRgC4uLgwbdo02rRpY7fs3Llz\n+fnnn3F1dWXFihU0adKEpKQkZs6cSXx8PGazmXHjxjF27Nhi/4aZmZnMnz+fY8eOYbFYmDJlCnfe\neafWUvDqq6/y0UcfkZKSwsSJE3nwwQex2WzMnz+f7du3U79+ffr27cv+/fv5+OOPi93ejfr000/5\n9NNPMZlMdO7cmYULF+Li4sK+fftYsmQJZrMZgKeeegqLxcJHH32Eq6srGRkZPP3003bbOn36NPfe\ne6/WUtS4cWO2bNlCYGAgAAcPHmTJkiVkZ2cTEBDAsmXLtG0bjUbGjRvHmjVrbvgzCECJSqdZs2bq\n4sWLdmUjRoxQe/bsUTExMapDhw7q1KlTSiml3nnnHTVlyhSllFJvvvmm6t27t0pNTVVKKXXgwAEV\nHh6ulFIqJydHdevWTR0+fFgppdS2bdvUoEGDlNVqVV9++aVq3769ioyM1NYLCwtTBw4cUDabTY0c\nOVLdfffdymg0qlOnTqlWrVqp/Px89ccff6gePXqo7OxsZbVa1b/+9S+1atUqLZYePXqouLg4ZbPZ\n1IQJE9Tq1auVUkqNGzdOffbZZ0oppbZv366GDBmilFJq7Nixavny5UoppaKiolTXrl1VWlpakeMz\nfvx49c477yillIqLi1OdOnVSsbGxKjY2VrVq1Up98sknSiml/ve//6mBAwde9RjfeuutateuXeqO\nO+5Q69evt6szm82qc+fO6ujRo0oppVauXKnGjRunlFJqx44dKjw8XOXk5Kj09HQ1ePBgNWbMGKWU\nUlOmTFGvvvqq9rlat26tvvzySxUbG6tatmyplFJq06ZNaujQoSorK0uZzWY1YcIELd6ruXLdS7Kz\ns1XXrl3Vr7/+qpRSasuWLWrw4MHKZrOp2NhYdcstt6i0tDQ1f/58tWzZMqWUUsuWLVOzZ89WSik1\nZswYtW7dOqWUUlu3blXDhw8vst+XX35ZzZkzp9i4lCr8O77//vtKKaViYmJUx44dVXx8vIqKilKt\nWrVS33zzjVJKqUmTJqn+/furtLQ0lZqaqsLCwlRcXNx1LxcZGak6dOigzp49q5RS6q233lLTpk3T\nPlevXr1UfHy8stlsavz48erdd99VSikVERGhNm/erJRSatGiRdr5l5ubq5588kmVnJxs93lOnjyp\n2rZte83PvH79etWhQwd14sQJpZRSs2fP1o7rnDlz1Lx585RSSkVGRqqwsDCVkJCgzGazatasmUpK\nSlLr169X48ePV0op9eyzz6rnn39eWa1WlZycrHr37q3Onj2rLX/pb/fLL7+odu3aKavVqrZv364G\nDRqkcnNzVVpamho0aJB2bha3vWt9lkuxXPLTTz+p3r17q5SUFGWz2dRzzz2nxTFs2DDtnDtz5ox6\n9tlnlVJKTZs2TX3wwQdX3cf8+fNVz5491XvvvadOnjypbDabVpeenq46deqkDh06pJRS6osvvlAR\nERFKKaXWrl2rJkyYcM2/hbg2eeCvCti9ezcpKSl07NiRPXv20LVrV60pMyIigh07dmC1WgFo167d\nVW8N/P7779SsWZNOnToBcNttt5Gens6FCxcAaNCgAQ0aNNCW9/HxoVu3buh0Opo2bUrXrl1xd3en\nadOmWK1W0tLSaN26Nbt27cLLywu9Xk+HDh2IjY3VttG5c2dCQ0PR6XS0bNmSixcvUlBQwMGDBxk2\nbBgAAwYMYP369RiNRg4ePMi//vUvAOrXr0+nTp2K3LIwm83s27eP+++/H4DQ0FC6devGgQMHgMKr\nt0v3E8PCwoiPjy/2uM6bN4+MjAxSU1Ptyp2cnNi3b5/WEtC5c2ftcx0+fJh+/frh6elJjRo1GDp0\nqLbe4cOHtc8VHh5OcHBwkX3u3LmTkSNH4u3trd1D//7774uN8WqOHj1KnTp1aNeuHQC33347SUlJ\nXLx4kTp16jBu3DieeeYZ9u3bx+OPP263rtFo5P/+7/+0uAcNGsTnn39eZB+ZmZna1dnVFBQUcODA\nAUaPHg1A3bp16dKlCwcPHgQKbxvcdtttADRr1oy2bdvi5+eHv78/AQEBJCUlXfdye/bsoWfPntot\nmdGjR/PDDz+g/uq5vGvXrtSqVQudTkerVq24ePFikXgDAgLYs2cPR44c0a7W//75MjMzCQgIuNah\n1+K81FLSqlUrEhISgMLWgFmzZgGF/5/8/Py0/19Xs3PnTsaOHYterycwMJCBAweyfft2rX7EiBFA\n4Xmcl5dHRkYGhw8fpn///nh4eODn52fXqlXS9q7Hzp07GT58OAEBAeh0OiIiIrRt+Pv78+WXXxIZ\nGUmTJk1YsmRJidubNWsWkydPZteuXYwcOZI+ffrw3nvvoZTi0KFDNGjQgC5dugBw1113ceLEiSL/\nH8XNkWb/SurBBx/UHvgLDQ3l/fffx9PTk+zsbA4fPszgwYO1Zb28vMjIyADA19f3qttLS0vDx8fH\nrszb21v7j/b39Tw9PbVpvV6Ph4cHADqdDr1ej9VqJS8vj8WLF2tf+JmZmfTr189u+5cYDAasVisZ\nGRnYbDatTqfT4enpSWJiIkopIiIitHWMRiPdu3e3iysjIwOllN22fXx8SEtL0/ZzKVa9Xo/NZrvq\n8QB4+OGH6devH/fccw9t2rShV69eWt0nn3zCpk2bMJlMmEwmrdkyKytLe0AKsJvOysqyO45X1l2S\nnZ3Nhx9+yLp164DC5Hcjz3FA4d/yyv3odDrtb1m7dm3uueceli1bxmOPPYarq6vduhkZGej1eu3v\ne+Xf9kp+fn4kJiYWG0N6ejpOTk5254mPj492Pjk7O+Pi4gLY/00uzV/6sXo9y2VlZXHgwAG7c97T\n05PMzEyg8Py/5NK5+Xfjx48H4KWXXiIlJYUxY8bwxBNPFPnMSUlJ2Gw29Prir5uu/MxXfpZff/2V\n5cuXk5CQgF6vJy0t7ZrnX3Z2NlOmTNFuqxQUFNj9mLx0jl+KxWq1kpmZSf369bVlrjzHStre9cjK\nymLv3r3s2LEDKLxNdenzvfbaa6xevZoHH3wQLy8vnnnmGQYMGHDN7RkMBu6//37uv/9+cnNz2bFj\nBwsXLiQoKAir1cq5c+eK/F0l+ZcOSf6V1JUP/F0pODiYnj178uabb97Q9gICArQfCFD4n/rSlc75\n8+dvKsY1a9YQFRXFxo0b8fT0ZPny5ddMGFD4BavT6UhPT8ff3x+lFDExMdSuXRuDwcCXX35p9+V6\ntfX1ej2ZmZlaAszIyLiuK7a/a968OaGhoSxevJgZM2bw5ZdfUrt2bX755Rfef/99NmzYQJ06dfj5\n55+ZPXs2UJhojEajto3k5GRt2tPTs9i6S4KDg+nfvz9jxoy54XgvCQwMtPtb2mw2uyv1lStXMnLk\nSDZs2EBERITdFa6fn5+WUH18fLTjf2VCAejWrRsvvvgiBQUFdj8gIiMj2bNnD6NHj8ZisZCTk6Ml\n35v9O5QkJCSEPn36sHz58pvehrOzMxMnTmTixImcP3+eRx55hM6dO9v9uGzcuDG+vr7s3LmzSFJb\nuXIlDzzwwDX3MWPGDCZOnMioUaPQ6XT07NnzmssHBQXxzjvvFHnI1GKxFLvOtc6/4rZ3I4KDg7n3\n3nuZPn36Vevmzp3LSy+9xO7du5k+fbr2w/9qcnJyOHLkiPbcjqenJ8OHD+eXX37h9OnT9OjRgxYt\nWvDf//63yLpHjhy56c8gCkmzfxXTu3dvDh8+rDVD//777yxcuLDE9dq2bUtKSgpHjx4FYMuWLdSs\nWZM6dercdCypqak0atQIT09PLly4wO7du+2+mK7GxcWFXr16sWnTJgD27t3LhAkTcHZ2pm/fvloT\ndF5eHs8//3yRJlwnJyd69+6tXTnHxMRw+PDhEr9or6Vfv36MHDmSKVOmYDKZSEtLIyAggNq1a5OX\nl8emTZswGo0opWjTpg27du0iPz+frKwstm7dqm2nbdu22vzOnTu1pu0rDRgwgK+//pq8vDwAPv/8\nc+1YXK927doRHx/P77//DsA333xDvXr1qFmzJseOHWPPnj3MmjWLBx54oMi54e7uTo8ePbQH7Hbt\n2sWkSZOK7OOWW26hfv36zJw5k9zcXKDwQcvp06djs9m0v+Olv0NUVBRHjx694ddRr0efPn04dOgQ\ncXFxQOFtj8WLF5e4npOTE1lZWUBh8/P+/fsBqFevHoGBgUVeVzQYDEybNo0FCxZw/PhxoPA209Kl\nS9m5c6dda9PVpKWlERYWhk6n44svvqCgoOCa/x8GDBigne9ms5mFCxdy8uTJa+6jbdu27Ny5k4KC\nAjIyMvjuu+/+0fauFtO2bdu0H5dbt27l448/pqCggAcffJDU1FR0Oh2tW7fGYDBor4BeOs5XUkrx\nzDPPsHnzZq0sMTGR/fv306VLFzp16kRsbKz2GmBkZCQzZ84ECv922dnZ2q0dcePkyr+KCQ4OZsGC\nBUyePBmz2Yynp6d2n/FaPDw8WLFiBQsWLMBoNOLv76+9fnOzIiIimDp1KrfddhvNmzdn5syZTJky\nhY8//via6y1atIgZM2awdu1afH19Wbp0KYB2VbFhwwYA7rjjDmrVqlVk/Xnz5vHiiy+yceNGnJ2d\nWbhwIbVq1dKSw82YPn06v/76K/Pnz2f27NmsXbuW8PBwQkJCmDVrFr/99htTp05l2bJl7Nq1i8GD\nB1O/fn1uv/12Lak888wzPP3002zZsoVbbrmF9u3bFzm+4eHhnDlzhrvuugsoTESLFi26oVi9vLxY\nsWIFc+fOxWg0EhAQwOuvv47NZmPOnDnMnDkTV1dX/vWvfzF8+PAiz00sXryYp59+mk8//RRfX19e\ne+21IvvQ6/W8++67LFu2jBEjRuDs7Iy7uztjx47Vnqm4dKw2bNiAs7MzixcvJiQkhOjo6Bv6PCWp\nWbMm8+bNY9KkSVgsFry8vHjhhRdKXG/gwIEsWbKEmJgYRo8ezdy5c8nNzUUpRXh4ON26dSuyzqhR\no3B1deX555+noKAAnU5H9+7d+fjjj3F2dr7m/p588kkee+wx/Pz8GD16NKNGjWLWrFl89tlnV11+\n+vTpzJs3T3vmoW/fvjRr1uyaCe+2226zO/8GDx6sXSUXt70b0aFDB+0NDKUUgYGBLFy4EFdXV0aM\nGMGYMWPQ6XQYDAbmz5+Ps7Mz/fv3Z9asWVy4cEH7vwyFty0++ugjli1bprVUurq68vDDD9O/f38A\nli9fzosvvkheXh4uLi489dRTQOFzHG+88Qb9+vWTV5Vvkk7JTychSpVSSkvqn332Gfv27dM63rmy\nbuTIkUyaNInw8PByi1VUPVeeY2vWrOHIkSM3fBtQVH3S7C9EKTp58iQDBgwgMzMTi8XC999/r70V\nsGTJEubNmwfAuXPnOH/+PK1bty7PcEUV88cffxAeHk52djZms5kffviBDh06lHdYogJyaPI/ffo0\n4eHhfPrpp0Xq9u3bxz333MN9991n1x3pyy+/zH333UdERIR2z1KIyqJly5bceeed3H333QwZMoSQ\nkBDt4b2HHnqIqKgoBg4cyOOPP86cOXOu+tCmEDerTZs2DBs2jDvvvJMhQ4YQGhqqvW4pxJUc1uxv\nNBqZOHEiDRo0oHnz5kWeXh4yZAgffvih9uU4f/580tLS+PDDD3n33Xc5d+4cs2bN0h4YEkIIIUTp\ncNiVv4uLC++///5VOzKJjY3F19eXWrVqodfrtS4o9+/fr93/bNy4MZmZmVpXoUIIIYQoHQ5L/k5O\nTri5uV21Ljk52a7jEn9/f5KTk0lJSbEbWe5SuRBCCCFKT4V+1e967khc+WSrEEIIURFY8/LIOvkn\n6opeHI3RMSiLBa7IWSar6Zo9PV5N0wduvhOwS8ol+QcHB5OSkqLNJyYmEhwcjLOzs115UlISQUFB\n19yWTqcjOTnbYbEKCArylmPsYHKMy4YcZ8erqsfYajRiuhiPNdv+sxlP/Yn+r1ZuZTJhPHEMm8mM\nOTHBYbFU2uRfp04dcnJyiIuLo2bNmuzcuZOlS5eSnp7OypUriYiI4Pjx4wQHB9v1yy2EEEKUBktW\nFqqgAACb2UxG1Gnyz54h49RxrJ7umG1mTFYTBp2BwJj0m97Pz+08UX9rnE7yt+8QqrZnCJ7ORcfQ\nKE6vkhcpkcOS/7Fjx1iyZAkXLlzAycmJ7777jv79+1OnTh0GDhzI3LlztbGdhwwZQsOGDWnYsCFh\nYWFERESg0+l46aWXHBWeEEKIcpJtyiHPkndD6yQZU8jPzkRvvGK9fBOGlDTUlQMtKcXF3ERcDC4k\nG1NBB275VpodiMbJbKPARY+rqfhmdncAMq5al+FlINXXwIVgF1wMzrgaXNHbFHmezhg9CxO60kGm\nnxtWg44MWy4hHsG0CwrDy8WLQLfCZ9oUEOQegLuTGy4GF/S6su9yp0r08FcVm5gqkqrajFeRyDEu\nG3KcHS8oyJuExAyS8lKIyorl05PrcTG4cOnit8BqKpxQCr8sq1autymC0yxY9faXya4mG51PGvE2\n3th98WvJ8HfDNd9Ctq8rud6Fo0Y65eSR17AWuDjj3LUTjWs0wkXvTLBHIKBD5+SE7q8fGQadHoPe\nUGrx3KigoGuPI3E9KvQDf0IIISqG9PwMIrNi+D35BB7OV3+TK99SwMGEIxisiuZR+dxyJIdRNQrT\njKvBRVvOKyMf14KiQytfD3OIP+bQy6+QG3KMFDStZ/cQnU3Z8HT2wM3JDTeDK+5O7iizGZ/evXFy\n95SHxJHkL4QQ1ZLFZiHHnEuOKZdscw6n0s4SmRXN2YxIdOjsEqRN2XAx22gRmY+LWeGRbqFmqhmf\nXBsK7O5pdwX0V7Qn104xg94AuvzLhVc83e7b91YtcVtzcnBv1Ai9m7tdrMpixiOsDS4hIaV5CKo1\nSf5CCFEF2JQNmypMqhabhZNpZ0jNTyPXbCQ3L7sw0ZtzyDHlkmPORRmN+OQWLu+XZcHZoqidbaVJ\nno2G8SbyalxOwL4pxQ89rAMMDepr887OBnQ2AwWxsfgPvh2fXr1x9g9wzIcWN02SvxBCVGLRWbF8\nfOK/JBkLX5N2siiC0s0EZlhoezqPepk317zukmJE7/7XDwA3N2z5hVfuwQ/+C52TE24NGuAcGITe\n1dVuPXmuonKQ5C+EEJWEUgpbbi55sdGcy4ji7K5vcS9Q3HehgAxvA/5ZxSd61+bN0RuctAfsUGDN\nM+LRomXhrNWKe6PGoNfhVq8BToGBcm+8CpPkL4QQFYjJauJ8ZnRhD6e/n0R/6Fd0ZyLJ9XHFM6tA\nW84daHPFev5ZVgzePlizszD4+ODTszc6JwOebTvg3qhRmX8OUbFJ8hdCiHJiNBvZd/H/+DPtDO4W\nHamnjxGaUEC9BBMhaRa7ZT2zCsh21+OdZyPRz4nYWi4EeAXTsmYrat8yCIOvr1ypi+smyV8IIcqI\nxWbheOopsk3ZfHHsC7r9kUvtZDODUi3FrmNsVJvUfm0x1KtLm4CWuBhcaOTkRh+9fH2LmydnjxBC\nOFjOudN8t2EZzhZFi6gCnPXw+NX6rNHp8OnbD9eatfHu1g2Duwc6J/maFqVPziohhHCAzMQ4Ivd9\nj8eWPYD9/XknG5j8vHE12Qi47Xa8u/fAycdXEr0oM3KmCSHETVBKkX/+HNmH/w+sVtDrSY89i+7U\neW2ZK4dqyXXTE/zQeAKC6uJat57cnxc3rMBkJSUzT7r3FUKIsmBOS8WSlkbW/n2g15O588erLvf3\ndB5dxx2/gFAsw/rTtk57PG5g5DZRtR07n0p6dkGx9cmZeWTkmEhINXL2QqZd3bevj/jH+5fkL4QQ\nVzAlJGDLz6PgwgWyD+zDePJEscta9RAX7MKpBq6k/NWHfYvQNtzabjghHkE0K6ugRYWXmpnP8ag0\nbErxv/3RpGTml7zS34T4e2Czlc4AR5L8hRDVmrJaKYiNIfXbr8n97ddil0vzMRBT04VT9d3A1ZkM\nbyfydRbcndy4v8U9dAhqI035VZDFaiMpPY+YxGxy8swUmK2cu5CFl4dzkWWPnEoiyNe9SHlMUs5V\nt+3j6cKofo2L3bdS0KCWN17uztTwci12uZshyV8IUa2YU5K5+N47OIeEkL1/31WXOVfHhUwvAyg4\nW8+NlBAP+tftQ9+aHRnhVsNuhDpRdVisNqxWhdlqY8naX7iQnHvD24hJysHd1X64XzcXA/kmK0E1\n3OjcIpjQQE9CA72oX/Of37u/WZL8hRBVnlKKvD9PcmT2f8i/mABA/vlzWr0l2I+Luhx+6OZDlqde\nG2WuTWBLHms6gkB3/3KJW1ybTSmSM/JIzcznQnIuFpsNHTqUUvx2NgU/n6JDDyekGUnNzMfT3f7K\nPTGt+MGLmoT64uXuTICvG83q1sBmU9QL8cKgL9rSE+jrjv4q5RWNJH8hRJVlNRqJX7mCvDOni9St\nHexHvquePFc9Ficd4AfAnO7PEOIRVMaRiuIopcjNtxR2dwxsPxxLbGIONgV/nE8tYe3MYmty8sz4\nel5uwfHxdMGYb8HP24VaAZ4kZ+QxaURr6gR7lcbHqHAk+Qshqgyb2Yzx5HHyIyMxnjxB/tkzdvUX\ngpz5vocPWV72zbIt/Zvh4eTOmJajcJEm/XJnsdpYt+MsPx6Ju67lm9bxJayBPwq0pnSlFLUDPXFx\nMhRZ3tVZj4db0Xv21YkkfyFEpWc8eYLET9dgTky8av1/b/Mjyd8JdDq8Xbxo6VWb2xuE09C3Hnqd\nvoyjrd6M+WaM+RaOR6WRkGbUkvOJ6DSS0vPQ63Rk5pqKrNepWVDhu5QKQoM86d+pDga9Ds9qnsRv\nliR/IUSlZDObyD93jrilS4rUBYy4i1wnGx/k7SW1xuWvuQ9GvEqBDDVfZo6eSWb9jrME1ih8Av54\nZFqJ6/h4uhDi505qVj7jBregZ+ua8haFA0jyF0JUCkopzEmJGE+cIHXzN1gzM+zqbc4GPr+rNslO\nBbgZjpBvzQfXwq+4iOZ30Se0Bz5u3iRnS/a/UcZ8C1/sOkt0YjYuTgb+nouz88xcSM7F3dWAXqfD\npsBktmK1Fd6nT0zPs1u+TpAngb7u+Hm70qVFsLa9Gt6uhPhJR0hlQZK/EKJCyzq4n4T33y22/kxd\nV4608iAxwBko7DEt35pPqFctEnKTeLXPXNycSvcd6aomLSufvAILyZn5/BmdzsGTiYTUcOd0XCYu\nTnpMluvrWCavwErD2j5Y/lrxzRexAAAgAElEQVTealPk5plZ+Gg3rXnfYNChlyv5cifJXwhRISml\nOPvcU6i0dLvyY43dcDYrDvetS0iN2rQJbMUtNitBHgF4O3tR26smTjLc7TUdO5/KWxv/IMTfg9hi\nOqDJzCm8726y2GhQ05uohGweGdaSsIYBeF+lgxsAvU5HUJA3ycnSulLRyf8QIUSFoWw20iJPEfPu\nW3im2Xew8sb9wdRw9WVej+dw0jsxvJxirKiUUhw5lUxOnpmT0el4uF3+ek9MM5KQZiQjx/5Butik\nnEvP0NGyvh81Azwwma20qOcn99qrOEn+QohyZzObOPfkEyhTYXLyvKJuRxcv2g4fy9OewdT1Cq32\nV/X7jydw+M8kPN2cyckzcyau8NmH3HzLdW+jXrAXOr2OZ0d3wN21eh/P6kr+6kKIcmG2momMPUH8\n2o+pfd6+af94Y3eatL+FFoPu4TFD9X6Vy2K1kZVrYvev8Xy7L6rY5XS6wmb3to0D6NIimBpervh6\nXe6zwN3VqdT7hxeVlyR/IUSZi/5lLwWrPwSg9hXlH94ZwNgej3JXQIvyCawCiU7IZt7H/3fVun4d\nQhnUpS56vQ5ngx4/b0nq4sZI8hdClIlsUw7RZ3/FZdkHduX5IX5YO7emw10PU/SN/erldGwGB44n\nsOvXeLtyX08X/H3c6B4WwsDOdcspOlGVSPIXQjhMQm4ia058Tnx6HA9/lYKbSWl1+S46mixfiYdr\n1ew7/XoopTh4IpFth2Kw2RRxfxtFrkW9Gky+u430YidKnSR/IYRD5Cde5Pgbc7gryWxXbvL1xO3h\nB2nbqns5RVb2lFLs/i2e5Iw84pNzMVkK7+NfSLn6kLHPjO5A83o15H144TCS/IUQpSol+jQpry9H\nb8zjygZqJ39/aj02GfdGjcstNkdSSnEsMo2jZ1LsxnM3mWz8+Mu1B6jp3iqEe/o1xtPNGVeXogPR\nCFHaJPkLIW6aOSUZa04u6T9+T0bKBQxnogGwGypn6iM0bdOrSr8zbjJbeez13SUu1699bbqH1cTD\nzYmgv8Z9d3aSgYVE2ZPkL4S4IeaUZOJWvI45IcGu/Mrr1QwvA3nj7yasaQ8C3f3LNsAypJRi++E4\nPv/x8tDBLev7cWefhnZN9gaDjnoh3tKMLyoMSf5CiOtiNRqJXfIypgv2Tdhn67lhsNg438KfOs06\nMLTTqCo/TG5aVj7JGXksWXvUrvy5+zvQvJ5fOUUlxPWT5C+EuCpls5G5exdpWzdjSbMfitXk6cq2\nLh5E1nYmwM2fQfX78UitzjhX4d739h9PYO9v8fwZk1Gkrn/HUB4Y2KxK39oQVUvV/Z8qhLhptoIC\nzk6eWKTc6OfB/qYGjjVxp6ZnCOPq30qn4HYY9FX3ITWrzcYn351iz28X7cr9fVxpVrcGAzrWoXGo\nbzlFJ8TNkeQvhEApRd7pU6Rs/IKC2Bitj30A1bE1W1sbOGMpTH71vEN5tMEA2ga2qrLN+xarDZPZ\nxqE/E/nPtlNaeYCPGy+M7STd5IpKT5K/ENWYzWwm6ZM1ZO376ar13/TzI7J2EljA08mDh8Lup4V/\n0yrXvF1gtmKx2th3LIH1O85itakiy4zs24ihPRqUfXBCOIAkfyGqqZzffiV+5Qq7MreGjQh56GFM\n/j7M3LdAK6/rHcpTHR/HpQoNsmOx2ti09zxbD8QUu0yDmt60auDPXbc0xKCvmq0conqS5C9ENZMf\neZ6YRfPtygJH3YffoMGk5afz1P5X7Ooea/sv2gS2KssQHabAbOWVz37BarNx7kKWXV1ooCfBfu7k\nFVh48p520tmOqNIk+QtRTSiLhTOTHgV1uUnbyc+fhotfJT4/mbcPLSc+9/K7+01rNGJIw3Ca+TUp\nj3BLTUxiNrt/i8dqVez5Lb5I/ZDu9bmnX9XsdVCI4kjyF6IasJlNnJ00wa6s8Yq3MHh5YbFZePnQ\ncru6V3rPwdulcg+4Y8w38+n3pzlwIrFI3WMjwmjXOFCu7kW1JclfiCrKmpND7rHfydyzm7zTl59Y\nDx4zlhr9+mvz03a9oE3P6f4MIR5BZRqnI7z86RHOxmXalU0d2ZYGdWvg42qQnvZEtSfJX4gqJv2H\n7aRs3GD3ut4ltadOw6tte21+8/nvUBTeBohofnelT/xWm41HX91lV3Zrx1DG/NUBT1CQN8nJ2eUT\nnBAViCR/IaoIpRTnpk/BlpOjlend3fHt0xevTp1xb3z53n1CbhILDi7V5ut6h9IntHIPsbvoP4c5\nF3/5Ib5GtX14cWzncoxIiIpLkr8QlZiy2Qo75bFYiF28UCv3aBlG6LSn0BkK72n/nnycP05+gQ0b\nBy4eLrKd5zpPLbOYS1NegYW9v1+0G1gHYMrINnRoWrlbMYRwJEn+QlRSxXXBW/PRx/Dp1p34nARO\np59jw5mvr7q+t4sXc7rNwMPZw9Ghlrp9xy7yweaTRcofur0FfdrVLoeIhKhcJPkLUUmdf+5pbbpG\n/3B0Tk743tofl6BgLDYLiw4ts1u+lmcIdzQaTE3PYALc/Ctlf/xKKZav/41jkZcHGqoV4EHHZkHc\n0ashzk7SEY8Q10OSvxCVjLJYiFm8ULu3X2/2XNzqN9Dqt0fv4qtz/9PmH249hpoewdT2qlnWoZYa\nm1Ks+/Es2w/H2pW/90w/nAyS8IW4UZL8hahkzj01FZvRCIB7s+Z2iT+zINsu8c/sMo263pW7Gfw/\n2/5k16/2nfMM7VGfkX2lYx4hbpYkfyEqkcT/fKQl/loTJuHdtZtW994f/+G35GPa/Kr+r5Z5fKVF\nKcWmvefZvC/arrxry2Am3hFW5QYWEqKsOTT5v/zyy/z222/odDpmzZpF27ZttboffviBt99+GxcX\nF4YOHcqYMWM4ePAgTz75JE2bNgWgWbNmzJ4925EhClFpZB08QOae3QC4t2ipJf60/HQWHHwdk/Xy\ne/0Le84qlxhLg9Vm46m3fibbaNbKOjUPYvJdbcoxKiGqFocl/0OHDhEdHc26des4d+4cs2bNYt26\ndQDYbDYWLFjApk2bqFGjBo8++ijh4eEAdO3alTfffNNRYQlR6RTEXyB921Zt2F3Ptu0InTodm7Kx\nI3Yvm85u0ZZtH9SGR9s8WF6h/mM//3GRD7dcfoq/brAXcx/qIlf6QpQyhyX//fv3awm9cePGZGZm\nkpOTg5eXF+np6fj4+ODv7w9A9+7d2bdvH6GhoY4KR4hKKeHfH2hJ/5LaTzyJ1WZl6q7n7cqf7DCR\nZn6V9z74lBV7yM23aPP/ur0FfdrWksQvhAM4LPmnpKQQFhamzfv7+5OcnIyXlxf+/v7k5uYSFRVF\naGgoBw8epGvXroSGhnL27Fkee+wxMjMzeeKJJ+jVq5ejQhSiwkr99mtSv95kV1ZrwiS8unRle/Qu\nvj6/VSvvVrMTQxsOIsDdr6zDLDXPvr1PS/xdWwYz4Y4w6X9fCAcqswf+1BXDiOp0Ol555RVmzZqF\nt7c3derUAaBBgwY88cQT3H777cTGxjJ27Fi+//57XFxcrrntoCBvh8Yu5BiXhUvHOG7jV3aJ38Xf\nny4fvQ/Ac9+/TGT65dfdZvSaSNc67amsElJzefTlH7T5/p3rMn10R4fuU85lx5NjXPE5LPkHBweT\nkpKizSclJREUdLm7za5du7J27VoAXn/9dUJDQwkJCWHIkCEA1KtXj8DAQBITE6lbt+419yUDdTiW\nDIbieEFB3iRdTMd48gQX1nyilTf74GMA/oyJYc7+xVp5fZ+6PNt5ClA5z3+rzcbqTcc4eubyd8SE\n4a3oHlbToZ9HzmXHk2PseKXx48phyb9Xr16sXLmSiIgIjh8/TnBwMF5el8cHf+SRR1iyZAnu7u7s\n3LmThx56iG+++Ybk5GQefvhhkpOTSU1NJSQkxFEhClFh5JyP5Mz0GXZlTd//CJuy8cWZb9kd97NW\n3r9uH0Y2HV7WIZaaF94/wMVUo13Ziim98fG8dgufEKL0OCz5d+zYkbCwMCIiItDpdLz00kts3LgR\nb29vBg4cyL333sv48ePR6XRMmDABf39/+vfvz4wZM/jxxx8xm83MnTu3xCZ/ISozZbOR+vUm0rZ8\nq5X53T4U/9uHkpafzpz9r9gtv6zvQlwNle//hMVqIy0rn5nvHrArv6NXA+7s06icohKi+tKpK2/G\nV1LSxORY0oznGMpq5czEh+3Kmqx+D72LS5Gn+R9seS/da1Wu4WltSnE2LpNXPvulSF3XlsE8NqJ1\nmcck57LjyTF2vArd7C+EuLaEf3+gTTea8DCGzr3Q6fWk5afzwbFPtbpXes/B28Xrapuo0J5Yvod8\nk1Wb93J3xt/HlVH9mhDW0L8cIxNCSPIXoowpi4WYVxZREBUJFA7BW2voQJKTszmS+Cv/PbWRPEs+\n7QLDGNNyVKUacvdUTDofbjlJSma+Vta+SSB39mlIvRB5AlyIikKSvxBl7MqBeVxq1sKnW3fyzPn8\n58Q6DiYcwcXgwgMtRtGjVudK1cHNa/89ysnodG1eBwzsUpeIAU3LLyghxFVJ8heijCiLhfh3V2uJ\nv/aUaXi1a09kZgyfHFpHYk4y9bzr8FDYaII9gkrYWsWRm2/mP9tOaYlfr9OxYmpvvNydyzkyIURx\nJPkLUUbOPPaINu1SqzYebduyLepHtkRuRynFwHr9GNZoEE76yvHfMifPzNQ39tqVDexcl9HhcqUv\nREVXOb5lhKjECuJiiZ57eXTK4AfH4d6rF8uOvE1kVjQ1XH2Z2uMhQvS1yzHKG7P3t3g+2vqnNh/i\n507zen6S+IWoJCT5C+FApuQku8Rf67HJeHfuwpSdM7EpGwadgee7TqNhiGN7tvunjPlm4lONLF//\nK24uTqRnF2h1r03qSYCvWzlGJ4S4UZL8hXAQZbEQ9fyz2nzTdz5A5+TEU7tfxKZsADzVaRJezp7l\nFWKxLFYbv5xOZtvBGKIS7H+U5BUUvr7n7mrgrWm3VKqHEoUQhST5C+EAymq1u8cfOn0Gm2N+ZFvU\nj1rZsIaDaOBTrzzCK1ZmronpK38qtr5Xm5oM7FxXXtsTopKT5C9EKUv8z8dk7tmlzQeOvJfvXKP4\nMWqPVjas4SBubxhe9sFdQ+TFLBasOWxXdku72tzWtS61Aipe64QQ4uZJ8heiFOX+8btd4ncZfQ9r\n/KI5GxOplb1+y3zcnCrWPfKv9p7nm5+jtPkVU3vj41H5xhAQQlwfSf5ClKILbywDwDk4hLjJd/L5\nqU2Qcbn+rVuXVKh75CkZeTz7zn67sndn9MXZyVBOEQkhyoIkfyFKgVKK6Jde1OYDZj7LJyc+0ubH\ntryPbrU6lUdoxTp0MpF3vj6uzTerW4Pn7u9QoX6cCCEcQ5K/EKXgzKMPadNeEaNYceIjEo1JdK/V\nmTEtRlWohGpTilnvHSApPU8re3VSDwJ93csxKiFEWZLkL8Q/dOGtN7Rpt1F3sdr9N9KNGQyoewt3\nNRlaYRJ/vsnCxt3n+eFInFbm4qxn5ZN9pJlfiGpGkr8Q/0D0wnna6Hz6tmGs8viVnIJcRjS6nYH1\n+1WYxK+U4vFle+zKxg5uTr/2oeUUkRCiPEnyF+ImxSxeqCV+GtRhdbtMTGYzo5vfTe/Q7uUb3N+8\n+83le/tjBjWjX4dQ9BXkh4kQouxJ8hfiBimrlTMTH9bmbb7evN3LCjYY3/oBOga3LcfoitrzWzyH\nTiYBENG/Cf071inniIQQ5U2SvxA36PzT07TpvM6teL9ZKs56AxPbjKOFf8UY2MZitfHhlpMcPJFo\nVz6wS91yikgIUZFI8hfiBpyeMB5shf3y/9jFm2NNU/B08uDx9uMrTFe9aVn5zFi9z67Mz9uVpY/3\nrDDPIAghypckfyGuk/H0KS3xR9d05ljTwlfjpneaRC3PkPIMTROXnMOcDw9p83ff0ohhPRuUX0BC\niApJkr8Q1ylty7cAWHw9+ap/YV/3E9uMqxCJPy0rnx9/iWPrgRit7PXJvfDzdi3HqIQQFZUkfyGu\ng81swnj8GAD/HlB4xd8puB1tg8LKM6yrNvEDrH7qFtxc5L+3EOLq5NtBiBLYCgo4O3miNp/npifY\nI5BxrSLKMSr4/lAMn+84q827OOt5dFgr2jQKwMVZOu0RQhRPkr8Q15Cy8QvS/rdZm//3HQEAvNT9\n2XKJx2K18e8tJznwt6f4lz7eE3+fijVSoBCi4pLkL0QxCmJj7RL/5j4+ZHsZWNTrhXKJ50xcBos/\n/cWu7NaOoTwQ3gy9Xp7iF0JcP0n+QvyNNSeH5C/WkfXTXq3sjfuDAXj9lgW4OZX9Q3QrNvzG7+dS\ntfl7b23C4G4V49VCIUTlI8lfiCsom41z056wK1s9KhCAWV2nl3nitylFZHyWlvj9vF1ZPKG73NMX\nQvwjkvyF+IvNZOLs4xO0+VqTJjMzYz381TFOqFetMotFKcXUN/aSm2+xK399cq8yi0EIUXVJ8hfi\nL4kf/1ubrj15KntrpEFmYeKf0WlymcVhsdqY8Nouu7IOTQN5YGCzMotBCFG1SfIXAsiPPE/2oQMA\nWMbcxXPpn0N6YV2bwJY09K1fJnHEJeUw59+Xe+h7YGAzBnSSgXiEEKVLkr+o9qx5ecQsmq/Nr7L9\nrE23D2rNI60fLJM4dh29wH++O6XNPzO6Ay3r+5XJvoUQ1Yskf1GtWfPyODdlkja/8r4gbfqNfi/j\npC+7/yJXJv6V0/rg6eZcZvsWQlQv+vIOQIjyknv8mF3i57Gx2AyF9/hnd3u6zBK/1Wbjidd2aPP/\nntlfEr8QwqHkyl9UO8pmI3rubEzxF7Sy0BnP8Wz8RwDU965LzTIarMemFI++ukubv7NPwzLZrxCi\nepPkL6oVqzGXc1MvP7nvWq8+IVOn8vQvS7Sypzs9XmbxTLziqf5xg5vTt31ome1bCFF9SfIX1Yay\n2ewSf/ADD/JDaC47r0j8/2o1GoPe8R3o/P11vmcf7EyLUB+H71cIIUDu+YtqIvvw/3FmwnhtvtHS\nFeyqW8DOuJ+0sqc7PU6Xmh0cHotSyi7x39a1Ln3kil8IUYbkyl9UeZk/7bHvwGfqdJS3J9t/2QVA\nLc8QXuj6FDpd2QyOs2lvpDa94JFuhAZ6lsl+hRDiEkn+okpTSmmJ3+DlTYPFr2Jwd2f1b5d/DLzY\n7ekyjeno6WQA7uzdUBK/EKJcSPIXVVrWvssd9jRa9gY6vZ4sUzbHU/8E4Il2j5RpPAdOJHAhJReA\n8M51y3TfQghxidzzF1WWKTGRxI8+AKBG/3B0ej0mq5nnf1qgLdPCv2mZxXM8Mo33vjkBgLeHMx5u\n8ttbCFE+JPmLKsmWn0fUC89p84Gj7iWzIJvpu1/Qyl7rM6/M7vNn5pp4fd2v2vwbU/uUyX6FEOJq\nSkz+Fy5cYOrUqTz4YGH/5uvXrycqKsrRcQnxj6R+87U23fiNVeicnJn18+Ur/jndZuDh7F4msRw4\nnsD0lZffKvjguVvLZL9CCFGcEpP/7NmzGTFiBEopABo2bMjs2bMdHpgQN8uWn0/699sAqPX4FAye\nnjy95/I5+3KvFwnxDC6TWD79/hTvfXtCm1/4SDf0ZdTaIIQQxSkx+ZvNZgYMGKA1j3bp0sXhQQlx\ns2wFBZx94jFt3qtde6KzYimwmgCY1PYhfF3LpjOdL3adY8cvhV0Ih/h7sPTxntSWp/uFEBXAdT1x\nlJWVpSX/M2fOUFBQ4NCghLhZVyb+Os/MRGcw8OrhlQDodXpaB7YskzjWbj/ND0fitPnFE7qXyX6F\nEOJ6lJj8J0+ezL333ktycjLDhw8nPT2d1157rSxiE+KGpG37H/x1eyp0+gw8mrcgNS9dq3+tz1yH\nx6CU4u2vjnH4VOG7/HWCPJn/cDeH71cIIW5Eicm/VatWfPXVV5w+fRoXFxcaNmxIUlJSWcQmxHXL\nj44i5Yv1AHh16IRnWGtyzUbm7F8MgA4dbk5uDo3BZlM88upObb5ZHV9mjunk0H0KIcTNuOY9f5vN\nxuTJk3F1daV169Y0a9YMnU7H44+X3ahnQpREKUXMgrkAGLy9qT15CgBbI38oLNMZeO2WuQ6P49X/\nHtWmB3Wpy3MPdHT4PoUQ4mYUe+W/efNmVq5cSXR0NC1btkSn06GUQq/X07t377KMUYhiWY1Gzk29\n/GO00dIVACTmJrH7wj4C3QN4sdvTOOsd26HOV3vPczo2A4Axg5rRv2Mdh+5PCCH+iWK/EYcNG8aw\nYcNYuXIlU6ZMsavLzs6+ro2//PLL/Pbbb+h0OmbNmkXbtm21uh9++IG3334bFxcXhg4dypgxY0pc\nR4i/u/juam261sTH0RkKh+PdeHYLNmXjriZDHZ74N+05z7f7orR5SfxCiIquxG/FKVOmcPbsWdLT\nCx+cMplMLFy4kK1bt15zvUOHDhEdHc26des4d+4cs2bNYt26dUDh7YQFCxawadMmatSowaOPPkp4\neDgxMTHFriPE3xlP/Ynx+DGg8Ml+j+YtAPgz7QzHUk/StEYj2gWGOTSGX04n2yX+f8/s79D9CSFE\naSgx+S9atIiffvqJlJQU6tWrR2xsLOPHjy9pNfbv3094eDgAjRs3JjMzk5ycHLy8vEhPT8fHxwd/\nf38Aunfvzr59+4iNjS12HSGulB8VSdxrr2jzlxK/1WblyzPfokPHyKbDHdp975FTSazadEybl8Qv\nhKgsSkz+v//+O1u3buXBBx/kk08+4dixY2zfvr3EDaekpBAWdvmqy9/fn+TkZLy8vPD39yc3N5eo\nqChCQ0M5ePAgXbt2veY61xIU5F1iPOKfqUjH2FpQwIGF87T5nps2oNMXPru6/exe4nMTuLVhTzo2\nauGwGP6MTrNL/BuXDMfZ6Z8NlVGRjnFVJsfZ8eQYV3wlJn8XFxegsKc/pRStW7dmyZIlN7yjS90D\nA+h0Ol555RVmzZqFt7c3depc/R7pletcS3Ly9T2DIG5OUJB3hTrGKZu+1KabrHqXlNTCIXLzLHn8\n9/evcTG4MLB2f4fG/Mybe7Xp957pR0Z67j/aXkU7xlWVHGfHk2PseKXx46rE5N+wYUM+++wzOnfu\nzEMPPUTDhg2v64G/4OBgUlJStPmkpCSCgoK0+a5du7J27VoAXn/9dUJDQykoKLjmOkIoi4W0Ld8C\nEDT6AfSurlrdd1E7yTHnMrzRYId24btlf5Q2veyJXjgZZHBMIUTlUuK31rx58xg6dChPPfUUI0eO\npH79+rzzzjslbrhXr1589913ABw/fpzg4GC75vtHHnmE1NRUjEYjO3fupEePHiWuI6o3c0oyZx57\nRJuv0ffy6HgpeansjN2Ln2sN+td13HC5Wbkmvtx9HoDwznWo4eVawhpCCFHxlHjl//LLL/PCC4Vj\noA8fPvy6N9yxY0fCwsKIiIhAp9Px0ksvsXHjRry9vRk4cCD33nsv48ePR6fTMWHCBPz9/fH39y+y\njhAAlox0Imc+o83XnjwFndPl03fT2f9hUVbubDIEF4OzQ2LIzDXZDc17V59GDtmPEEI4WonJ32Aw\nsH//fjp27Iiz8+UvVb2+5KbOGTNm2M23aHH5AaxBgwYxaNCgEtcRAuD8jOnadINFS3AJCdHmz6Sf\n49fkP2joU59Owe0csn+rzWaX+J+6tx3uro7tP0AIIRylxG+vDRs2sGbNGm1eKYVOp+PkyZMODUyI\nS9J/uPx2SeM3V2HwuDwsrk3Z+PLsZgDuaea4V/ve//aENr1iam98PFwcsh8hhCgLJSb/I0eOlEUc\nQhQr+fPPAPDt288u8QMcTPiF2OwLdAnpSAOfeg7Zv1KKQycLB7MaHd5UEr8QotKTdktRoZkSErTp\n4AfG2tXlWwr45txWnPXOjGg82CH7zzKamPbm5eb+gZ3rOmQ/QghRliT5iwrLZjYR9eJMAAy+NbSO\nfC7ZHrOLLFM2tzcIx8+tRqnuOzPXxGv/PUp8yuX39x8a4rhOg4QQoixJ8hcV1tlJE7Tpei/av/mR\nlp/OjzG78XXxYWD9fqW6X5PZavdwH8DSx3vi7+NWqvsRQojyUmLyz8zM5J133iE5OZmlS5eyY8cO\n2rdvr/XLL4QjGE/9qU3XefZ5nP387Oq/PrcVs83CiMa342oo3Xvwb391udveGRHtadVAznUhRNVS\n4vt6L774IrVq1SIuLg4oHNXvueeec3hgonq7NGiPk58/Hs2a29VFZkZzOPFX6nnXoUvNDqW73+Qc\nfjuXCsBT97WTxC+EqJJKTP5paWmMHTtWe8d/8ODB5OfnOzwwUX0l/udjbbrh4lft6pRSfHGmsHvf\nkU2Ho9eVXte6FquNOR8e0uZbNwwotW0LIURFcl33/M1ms/b+dEpKCkaj0aFBierLnJ5O5p5dANQI\nH2jXix/A4cRficqKoUNwW5rUaFhq+z0Vk86StUe1+eVP9Cq1bQshREVTYvJ/4IEHuOeee0hOTuax\nxx7jjz/+0Lr7FaI02UwmIp+53JNf0L2j7epNVhNfn9uKk96JOxsPKdV9v3dFJz6PDGuJr/TZL4So\nwkpM/rfffjsdO3bk6NGjuLi4MH/+fIKDg8siNlHNZO7aoU03Wrq8yKt9P8bsJb0gg0H1byXQvfTu\nxf96NoX07AIA3p3RD2cnGaVPCFG1lZj8+/bty7Bhw7jjjjvs+uYXorRl/rQHKOzMx6mG/dP9GQWZ\nfB+9A28XLwbVv/Vqq98Um1K8+cXvAPh6ukjiF0JUCyV+061fv56goCBmz57NiBEj+PDDD0lMTCyL\n2EQ1kvnTXkzx8QB4d+tepP7bc99hspkZ3ug23J1K7337t778Q5teOrlnqW1XCCEqshKTf82aNXno\noYfYsGEDq1atIi4ujvDw8LKITVQjaZu/0aYNHh52dTFZcRxMOEKoVy161OpSavs05pv59WwKAE/c\n3QbDdYxUKYQQVcF1Pe1/+vRpvvvuO77//ntq1KjBnDlzHB2XqEbyY6IxpyQD0OStd+zrLAUsOfwm\nAHc0Glyqr/bN/ej/tMH5tdwAACAASURBVOmOzYJKbbtCCFHRlZj8Bw8ejLu7O8OGDeODDz4g5Ipx\n1IUoDcnrP9em9W72Tfpbo37Qppv7NSm1fWblmkjJLOyvYkZE+1LbrhBCVAYlJv+33nqLJk1K70tX\niL/L+/MkAA0WvmJX/nvycX6I2Q3A/c1H4mxwLrV9/ue7U9q09OInhKhuik3+06ZNY8WKFTz88MNa\nBz9Q2MOaTqdj165dZRGfqOIy9+7Wpl1q1rSre/ePNdp015odS22fZouNX04X3maYcnebUtuuEEJU\nFsUm/xdffBGAtWvXFqnLy8tzXESi2rBkZJC45iMAfHr3savLNV/uRXJFv5dx1pfeAJT7jycAoAM6\nyL1+IUQ1VOzTU4GBgQDMmTOH0NBQu38ysI8oDVn792nTwREP2NU9u3cuAEHuAaWa+G1K8fHWwhED\nR90qt7OEENVTsd+q33zzDatWrSI+Pp5+/fpp5WazWfthIMTNsublkfLlegAC777H7kG/zee/16Yf\nb/dwqe73wF9X/QADOtUp1W0LIURlUWzyv+OOOxg6dCgvvPACU6ZM0cr1er107yv+sXNTJmnTvn0v\n99hnsVm0J/xvbxBOsEfp/dA8eyGTDzYXPlzYp20t6c1PCFFtFZv8T5w4QatWrRgxYgQxMTF2dVFR\nUfTo0cPhwYmqyZRwUZuuO/MFDJ6e2vziQyu06SENS6czKZtNcSIqjWXrf9PKxt0uXVULIaqvYpP/\nV199RatWrVi9enWROp1OJ8lf3LSYlxcC4Fq3Lu5NmtrVJRiTAHih61Ol1qHP7A8PcjH18gOEK6f1\nQX/FGyxCCPH/7d13XFXlH8Dxz73AZQgOBBwMJw5AcFtqbi1nmSO1TIufK3GlJqFJ7jTLzLTUzJbm\nKCs1t6U5cCOCSioqKooCTkDG5d7fHzcPkgMu3IuM7/v16tVzx/Oc5zwc/HLOec73KW6eGPyDgoIA\n+OGHH7K8r9PpUEsaVJFL2rt30SUnAVDhnRFZPvvqxHKlXNE+62N/ubV802kl8Deo6czbnWpja226\nCYRCCFEYZRvF161bx4oVK8jIyKBv3760bdv2sY//CZETDy/bq3HOnDtyX5tCeLzhfryvk7dJtqXX\n69lzwnCLoU7VsgzvXkcCvxBCkIPgv3r1anr16sX27dvx9PRk586dbN68OT/6JooYvVZLwvrfgEfP\n+hccX6qUh/gOMMn2xn+Z+SjhmN5+JmlTCCGKgmyDv7W1NRqNht27d9OxY0e55C9yLfFE5oQ7+7r1\nlHK6Tkv03csABDYaZZJt/bbnPDfvpgIwUCb3CSFEFjmK5FOmTOHYsWM0btyY0NBQ0tLSzN0vUQTd\n/8eQXMex68uoHvojcvSuIKXs7uCa5+3o9XrW77sIGB7pa+FXMc9tCiFEUZJt8J87dy6VKlXiq6++\nwsLCgpiYGKZMmZIffRNFTFLYcQBsKlVW3jtzK0opT2z8rkm2c+9+ulJ+q1Ntk7QphBBFSbazn1xc\nXPDx8WHXrl3s3r0bPz8/atWSy6jCOGk3bpAeb1hMx66WISCnZaQxP3QxAOXsnE02w//LXyMMbTra\nmaQ9IYQoarI9858/fz5z5szhxo0bXL9+nenTp7N48eL86JsoInQp97kY9J7yWm1jg16vJ2jfDOW9\n9xuPMcm2Eu6k8M/l2wD0bFnNJG0KIURRk+2Z/8GDB1m1apUy0U+r1fLGG28wZMgQs3dOFA3nRg5X\nytUWfEmKNpWxf3+gvDe2wXCTLN6j0+uzzPCvX0PWoBBCiMfJ9sz/v0l9LC0tUUl2NJFD96POgU4H\ngMfkKVjY2rLmzG/K512rvkTVUpVMsq2vfj+plL8Y3UKOUyGEeIJsT7d8fHwYOnQoTZs2BWD//v3U\nqVPH7B0TRUP8r78AoLYrgY2HIcgfjD0KwDDft/BxMt2EvCORhtTA/+tSGzsbSeYjhBBPku2/kEFB\nQWzevJmwsDBUKhXdunWjY8eO+dE3UcjpdTruRxqy9rmPDwTgRnK88rlX2Zom29bDS/U29algsnaF\nEKIoyjb4q9VqPD09UalUqFQqatasKZdTRY6cH5/56J61uzsAUw7MUd4z1cI9J6LiWbLhlEnaEkKI\n4iDb4D979mx27txJnTp10Ol0fPLJJ3Tp0oXRo0fnR/9EIZUaE0PGHcOs+4rDRwKG/P0PzG4ebJLt\npGsz+GztCeX11++1Nkm7QghRlOVotv8ff/yBlZUVAGlpafTp00eCv3iquFUrlLJ9vfoAnL55BoCS\nGgfsNSVMsp0hc3cr5SXjW6FWy1UpIYTITrbXXZ2cnLC0zPwbwcrKClfXvKdgFUVb8mnDZfgqcz4B\nDCl3d13eB0CP6l1Mso2oq3eU8vi+9bC0kHUnhBAiJ7I98y9Tpgw9evTgueeeQ6/Xc/jwYdzd3Zk/\nfz4Ao0aZZiEWUXSkXbuqlC3LOAJw7EYYUXcu4OvkTcPy9Z5U1Sgzvjc8NVDKXkPtSmVM0qYQQhQH\n2QZ/d3d33P+drAXQqlUrc/ZHFAFxP68BQPVvTojUjDTWnfsDS5UFr5rorP9i7F2lHDywkUnaFEKI\n4iLb4B8QEJAf/RBFhF6nUxbwcRtrSOm7PfovbqfeoUOl1jjblTXJdg6eug6Ad+UylLa3NkmbQghR\nXMhNUmEyer2es4PfVl7betYg4f5NdlzaTSlNSV6s1MYk27l5N4Wthy4D0KqezD8RQghjSfAXJnPv\n0EGl7B5kyN2/7twfpOu0vFK9EzaWpjlDH7coM39/vRrOJmlTCCGKkxwF/1u3bhEeHg4Ycv0L8V96\nnY7YpV8BULr9i9hWrcY/N89xPC6cqqUq0aicaSb5XbiWea//04BmqCXhlBBCGC3b4L9x40Zee+01\n3n//fQCmTZvG2rVrzd4xUbhc+SQzc59T9x5k6DL4+ex6VKjo5fmyybJCTvvuCACOJa3lXr8QQuRS\ntsF/+fLl/P7775QpY3iUasKECaxZs8bsHROFx/UV33P/n0gAyg30R63RsOfqAa4mxfJ8hUZ4lHQz\nyXau3EhUyqN6+pmkTSGEKI6yDf4ODg7Y2toqr21sbJRsf0LEr/uZO3/9CYCmQkVKNX+BQ7HHWHvm\nd2wtbehW7SWTbEen0zP5m0PKa3cXe5O0K4QQxVGOkvz8+uuvpKamcvLkSTZt2oSjo2N+9E0UcCkX\nznNz00YASjZ7gfJv+QPw3alVAHTwaI2DxjRBeuuhS0r581EvmKRNIYQorrI9858yZQrh4eEkJSUx\nadIkUlNTmT59en70TRRwl2ZMVcrlBhoe8TudcEZ5r0Nl0y2ys37/RQD6d6iBva1ceRJCiLzI9sy/\nZMmSTJ48OVeNz5w5k7CwMFQqFUFBQfj6+iqfrVixgvXr16NWq/Hx8WHixImsW7eO+fPn4+HhAUDT\npk0ZNmxYrrYtzEuv1ytlz6++Vib07bz8NwDl7VxMtq0N+y+SmpYBQLM6FUzWrhBCFFfZBv+WLVs+\ndqb2rl27nlrv0KFDREdHs3r1aqKioggKCmL16tUAJCYmsmzZMrZt24alpSVvv/02x48bssJ16tSJ\nCRMm5GJXRH7Sp6YqZdW/Cz8lpiUpK/f5+7xhku0cjrzBr3+fB6C0vQaNlYVJ2hVCiOIs2+C/cuVK\npZyenk5ISAipD/3D/yQhISG0a9cOgGrVqnHnzh0SExOxt7fHysoKKysrkpOTsbOz4/79+5QqVSoP\nuyHyW0r0RQDsfDKv5uy4lLm8bkX78ibZzvdbIpXypwHNTdKmEEIUd9kG//8u31u5cmX8/f0ZOHDg\nU+vFx8fj7e2tvHZ0dCQuLg57e3usra0ZPnw47dq1w9rams6dO1OlShVCQ0M5dOgQ/v7+aLVaJkyY\ngJeXV7Y74ezskO13RN78d4yPzzLkeihVyU35bPufuwCY3Gq0SX4ml6/fIylFC8CGT17Oc3sFnRzH\n+UPG2fxkjAu+bIN/SEhIltexsbFcunTpCd9+sofvEScmJrJ48WK2bNmCvb09AwYMIDIyEj8/Pxwd\nHWnVqhWhoaFMmDCBDRs2ZNt2XNw9o/sjcs7Z2eGRMU6+cgUATeOmxMXdI0OXoXzmoqpgkp/Jewv2\nKOWi/jN+3BgL05NxNj8ZY/MzxR9X2Qb/RYsWKWWVSoW9vT1TpkzJtmEXFxfi4+OV1zdu3MDZ2ZCH\nPSoqCnd3d+WRwYYNGxIREUHPnj2pVq0aAPXq1ePmzZtkZGRgYSH3eQuSlEvRyj1/TYWKAGy+uBMA\nFSqTZPNLSdOSeD8dgI+GPp/n9oQQQmTKNvgHBgZmuXyfU82aNWPBggX06dOHkydP4uLigr294Zlv\nV1dXoqKiSElJwcbGhoiICFq2bMnSpUupUKECXbp04cyZMzg6OkrgL4ASjxlS7Fo6OaFSq0lMT2Lz\nxR0A1HfxfVrVHPtk1XGl7FLa9infFEIIYaxsg//s2bP5/vvvjW64fv36eHt706dPH1QqFcHBwaxb\ntw4HBwfat2+Pv78/b775JhYWFtSrV4+GDRvi5ubG+PHjWbVqFVqtlhkzZuRqp4T56FJSuLnRcCvG\n6eXuAEzYk3klqGeNbnnextX4JKKuGhbwCXy9fp7bE0IIkZVK//DN+McIDAwkJiYGPz+/LGl9R40a\nZfbO5ZTcXzKvh+/hXf3yCxKPGs78q3/xFZ+dXE7UnQsABD83Hhe7vC+x+/ZHfyrlbwLb5Lm9wkDu\nk+YPGWfzkzE2v3y55+/m5oabm2kWZhGFW2rMFSXwVxg8DK2VWgn8vWq8bJLAv+PIZaW8cEyLPLcn\nhBDiUU8M/uvXr6dbt24EBATkZ39EAZWRnER08CTDCwsLHBo3YdrBT5TPW7k1M8l2Vu44a2ivbkVs\nrbP921QIIUQuPDG3/88//5yf/RAFXMy8zEBfff5C7qbdIzbpOgD/8+lvkm2cjr6llF/vUMMkbQoh\nhHhUtgv7CAGGFfwAKn04DbWNDe/vnaZ8Vs+ljkm2sX6v4RaCm3MJLNRyaAohhLk88bpqaGgorVq1\neuR9vV6PSqXKNre/KDqSLlxUytZu7iw/mZnyObDRaJNsQ6fX88/l2wCM7uVnkjaFEEI83hODv5eX\nF59++ml+9kUUUMdHjwVA4+pGekY6R64bnsFv6dYUd4eKJtnG5z+fUMqOJW1M0qYQQojHe2Lw12g0\nj+T1F8XPrR3blLL7+EBG7p6ovO5d4xWTbOPhR/tea1PdJG0KIYR4sifeWPX1NU2mNlG4xa0yXOIv\n4VeXE8kXlPc/fM40yy7/uO0fpVypvAMvNvYwSbtCCCGe7InBf/z48fnZD1EA3fpzh1J2HTGafVcP\nAlClZCWc7crmuf0bt+/z57EYABrUcCZ4YKM8tymEECJ7MqVaPNHt7YZL/mWbNQXg9M0zALzp1dsk\n7Qd+lbli5PBXTfPEgBBCiOxJ8BePpc/IID3uBgDVA4Zx4U608pkpMvn9HXZVKQe90SDP7QkhhMg5\nCf7ise7u26uULe3s2HLRMCmvrI2jSdo/dNqQIMinqiPV3UqZpE0hhBA5I8FfPNa9f5ftdeppuMQf\nkXAagLe8++W57QydjlMXDdn8/DvVznN7QgghjCPBXzxWckQ4ACWbNWf7uT3K+x4OeX/8c9GvEUq5\nlL11ntsTQghhHAn+4hH6jAylbOlQkqVHDY/7udpXwEJtkbe29XpCz8YD0LetZ57aEkIIkTsS/MUj\nzg7xV8on4k4q5fdNkMp374lrSrl9I/c8tyeEEMJ4EvxFFnqdTim7vz+JxeHfAVDHyQuVSpXn9pdv\njgTgpSaSzEcIIZ4VCf4ii9ToiwBYOjlhXbWq8v6QOgPy3PaVG4lKuWeranluTwghRO5I8BdZ3Nlr\nmNxX8rmm7Lt6CABrS2uTnPVP/sbQXnXXUqhN0J4QQojckeAvFOk3E7iz+y8ANOXKs/H8VgA612hj\n0u34d5HH+4QQ4lmS4C8UN1b+qJRLPt+UxPQkAHp6dcpz23eT0wCw0VhQroxdntsTQgiRexL8hSL5\npOH5e/cJQRy5fhwAGwsbLC2euPJzjv317wI+TqVs8tyWEEKIvJHgLwDQa7Xo09MBsKpaleUnDc/2\ne5etaZL2f99rWA64qU8Fk7QnhBAi9yT4CwDuHshcYW9XzD6lPNC7b57b/mDZQaXcobE82y+EEM+a\nBH+BXqfj+rfLACj7cnd+PfcHAK9W74JalbdDZOuhS8TEGeYO9GhZVWb5CyFEASDBXxC7bIlS3uiR\n+Sx+S7emeWo34kICq/88B4BLaVs6P185T+0JIYQwDQn+gnsHDwDg9GpPDiWEAdDTsxuW6txP9EtK\nSefT1WHK64+GPp+3TgohhDAZCf7FnF6vV8qhnpkz8Vu7N89TuyM+y1wJ8Mt3W+apLSGEEKYlwb+Y\nS7t6VSnHam8D0NajRZ7a/OfSLaU89e3GWGvythKgEEII05LgX8zFrTI80mdXvwGhceGU0pTklWq5\nT+qTodMxe2UoALU8SuPmYm+SfgohhDAdCf7FXPJpw5K9N5v7cF97n4bl6uZ6hr9Or2fQnF3K6+4t\nqj75y0IIIZ4ZCf7FmF6rVcqH1IYMfA3L1811e3vCMm8hDH3ZG0+30rnvnBBCCLOR4F+MXZ4zCwB1\niRJExJ+inJ0z7vauuW7vuy3/ANC2gRuNa5czSR+FEEKYngT/Ykqv15NyPgqAu52ak67T0rBc3Vwv\n3fv3Q2f93V+oYpI+CiGEMA8J/sVUYugxpby/nCEDX8Ny9XLVVlp6Bt9ujgTAu3IZ7Gys8t5BIYQQ\nZiPBv5iKXfoVADaNG/HPrXNUKumOi51Trtr6KzRGKY/u7WeS/gkhhDAfCf7F1IMV/C608UKn19Eo\nl2f9gJLCt0vTylio5ZASQoiCTv6lLoYykgyX+VGpOHz7JCpU1HfxzVVbV+Iy1wJo18DNFN0TQghh\nZhL8i6G7Bw3L91pW9uDC3UvULFOdUtYljW5Hr9czedkhADSWakqW0Ji0n0IIIcxDgn8xlHLOcJk+\n1tMZgIblc3fJ/8zl20p5/sgX8t4xIYQQ+UKCfzF075BhFb+9TolYqi2p6+ydq3Z+3H4GgNqVykj+\nfiGEKEQk+BczutRUpRzNTXzK1sbW0tbodm7cSiYmzjB34I0ONUzWPyGEEOYnwb+YiVuzKsvrRuWM\nT+er0+sJXHxAeV2hbIk890sIIUT+keBfzNzZ/RcAfzU0rLbnXbaW0W18+M1hpfzFaLnXL4QQhY0E\n/2JEr9cr5Yjqhkv9VhbGZePT6/XK43192npKNj8hhCiEJPgXI+k3ritlnVqFl2NNo9tYv++iUu7Q\nyN0U3RJCCJHPJPgXI0kR4QD8U8kagBZuzxtVX5uh4/e9FwDDyn1CCCEKJwn+xUjSiTAAUjRq6rv4\nUsfJy6j6mw9EK+W+bT1N2jchhBD5R4J/MaFLSSH5ZAQAR73sGOjV1+g2th66DMCQbt6o1blb+lcI\nIcSzJ8G/mEgKP6GU75WwwEJtXFKem3dTSE7VAlCnalmT9k0IIUT+sjRn4zNnziQsLAyVSkVQUBC+\nvpmLx6xYsYL169ejVqvx8fFh4sSJpKenExgYyNWrV7GwsGDWrFm4u8ukMlOI/WYpAMdq2WJtYXwO\n/r0nrillOxuzHjZCCCHMzGxn/ocOHSI6OprVq1czY8YMZsyYoXyWmJjIsmXLWLFiBT/99BNRUVEc\nP36cjRs3UrJkSX766SeGDh3KJ598Yq7uFSvXf/xeWcL3sFcJOlRqY3Qbfx67AsA7r/iYtG9CCCHy\nn9mCf0hICO3atQOgWrVq3Llzh8REw/PhVlZWWFlZkZycjFar5f79+5QqVYqQkBDat28PQNOmTTl2\n7Ji5ulds6PV67uz6E4AYZytSbNS0dTcuMc+Fq3e4m2z448GvulzyF0KIws5swT8+Pp4yZcoorx0d\nHYmLiwPA2tqa4cOH065dO1q3bo2fnx9VqlQhPj4eR0dHQ8fUalQqFWlpaebqYrGQGp05Q//n9oaf\nh7GJfX7bHaWUrSxlAR8hhCjs8u3m7cPZ5RITE1m8eDFbtmzB3t6eAQMGEBkZ+dQ6T+Ps7GCyfhY1\nMfsMz+VfcTEE/C412ho9Xn8eMczyX/J+O5ydJI+/uchxnD9knM1PxrjgM1vwd3FxIT4+Xnl948YN\nnJ0N68dHRUXh7u6unOU3bNiQiIgIXFxciIuLo1atWqSnp6PX69Fosp+cFhd3zzw7UQQkRJ4FIKKa\nIZ3v887P5Xq8LHQZMtZm4uzsIGObD2SczU/G2PxM8ceV2S77N2vWjK1btwJw8uRJXFxcsLc3LCbj\n6upKVFQUKSkpAERERFC5cmWaNWvGli1bAPjrr79o0qSJubpXbNwL2Q/ADUdLmlZoTGnrUkbV3/Dv\nlYOSJTSoVPJsvxBCFAVmO/OvX78+3t7e9OnTB5VKRXBwMOvWrcPBwYH27dvj7+/Pm2++iYWFBfXq\n1aNhw4ZkZGSwf/9++vbti0aj4aOPPjJX94qFtOuxSvlWSQtauDU1uo2NIYY5A5LOVwghig6z3vMf\nN25clte1amUuH9unTx/69OmT5fMHz/YL07i9czsAsWUtsVRb4u5Q0aj691O1pGt1AHRtWtnU3RNC\nCPGMSIa/Iiw58jQA+/3saVrR+FsokdG3ANBYyQx/IYQoSiT4F2FpV68CcNXJCu+yxi/fe+qiIfh3\nblbFpP0SQgjxbEnwL6Iefkwyw1KFj1Nto9vY+W9Wvybe5U3WLyGEEM+eBP8i6u7+fQBcrKChc5X2\neWrLq4qjKbokhBCigJDgX0RdX/41AKkaFR0rtzO6/tINpwCw0VjII35CCFHESPAvghKPhyrl0y95\n5yp4h5w0PCbYqq6ryfolhBCiYJC1WYugmzGGxDxhnra8Ubu30fWTU9KVcu821U3WLyGE6S1YMI9/\n/jnNzZsJpKSkULGiKyVLlmLmzI/Nvu309HSGDfOnatVqODk54+LiwqBBb+W53aNHD7N8+VIuXYpm\n/fqt3Lt3j48+msrt27fJyMjA0bEsEycGU6KEvQn2ItPff+9i//49BAZ+8NjPlyxZhIuLC6+80tOk\n230WJPgXQSm/rgfgnntZKtobP1kv4LM9AJQqkX1qZSHEszVixBgANm3awPnzUQQEjM63bd+4cR29\nXk9QUDBLliwyWbsNGjTCz68er77aGYBVq37E17cur732OgDffLOE7du38sorPUy2zeJGgn8RVqfF\ny0bX2XLwklIe0s3blN0RQuSzRYs+5+TJcHS6DHr27Evbtu2ZOvUDypUrzz//nObGjesEB0+nSpVq\nfPjhRG7duklaWhqDBg2jcePn+OmnH9m1aycALVu2oV+//kyd+gE2Njbcu3eP9PQ0Ll++xEcfTcPR\nMXO57wUL5nHqVARarZZevfpSsmRJQkL2MmbMe2zevJE1a1ayfPlKrl+PZdasqXz22dP/cEhMvIda\nnXmX+u23Bz/ynQ0bfiMi4gS3bt3kwoULDBnyDtu2beHSpYt8+OEMatXyeuz+nD37DzNmTKFUqVJU\nrJh5m3Pt2lX8+ec2VCo1rVq1oXfvfnn6WRQ0EvyLmIgdP/PgfL2RWyOj66/96xwAVSo4UKtSmWy+\nLYR42Jo/z3E48oZJ22xUyyVXt9+OHTvCrVs3WbhwKampKfj7v8kLL7QEICMjg08//YJfflnN1q2b\nadOmHcnJSSxcuJS7d+9w+PBBrly5zPbtW1iy5FsA/P3707p1WwBKly7De+9N5MqVy0yd+gGBgR8o\nZ/4hISFcuXKJL79cRnJyMgMG9GX58hUsX74UgPDwMEqWLE1ycjLh4WHUr98w233p0eM13n03gH37\n9tCkyfO0bduB6tU9H/leTMwVFixYzG+//cKKFd+xbNmPrF//Kzt2bMPe3uGx+7N8+dcMHvwOTZs2\nZ/bs6QBcuXKZvXv/5ssvv0Gn0zFkyFu0bm38xOmCTCb8FTGxh/cCkOBm3AI+AIdOX+dBdoDA1+ub\nsFdCiPwWHh5GeHgYAQGDGTt2JDpdBjdvJgDg51cPAGfnciQlJVKlSjXu3LnDtGmTOX48lNat2/HP\nP5HUqeOLpaUllpaW1KnjS1SUYZVQL68nXxWMiIigbt0GANjZ2eHhUYnY2Guo1WpSU1OJj4+jWbPm\nnD59kvDwMOrVa5Dtvnh4VGLVql8ZMmQ4aWmpjBw5lM2bNz7yvVq1vFCpVJQt60T16jVQq9U4OpYl\nKSnxiftz8eJ5fHx8AZS+nDoVweXL0QQEDGbkyKGkpNzn2rWrRox+wSdn/kWMR9RtAGr0Nn7SzU87\nDb/YZUtaY2UpKX2FMFbvNtULzCRZKysrunXrTr9+bz7ymYVF5u+3Xq/H1taWpUu/Izw8jD/+WE9I\nyD4aNWqSJVlYeno6KpXhfNHS0uqJ21WpVFnqabXpqNUq6tTx5ciRQ9jbO+DlVYejRw9x7txZRox4\nN0v9W7duUaZMGfR6vdLP1NQUrK1taNLkeZo0eZ7nn2/Ojz9+S8eOXZ64X//dx//268H+6PV65ZaC\nTqdT9q9ZsxaMHTshS/sHDux/4n4XNnLmX4Tcv3RRKZf1qmtU3bT0DO4kpgEw7X+ylLIQhZ2Xlw/7\n9u1Bp9ORkpLCZ5/NfeJ3IyNPsXPnNvz86jF+fBDnz5+jZs1aREScQKvVotVqiYw8hadnjWy3W6dO\nHUJDjwCQlJTItWtXcXV1o27dBqxZ8xNeXt7UqFGT8PAwSpQogaVl5jloYmIi/v5vkJKSwsWL5/Hw\nqAzAiBFDOXbsiPK9uLgbWe7P58ST9sfDoxKRkYa8Jg+2UatWbY4ePURqago6nY7PPvuYtLQ0o7ZX\n0MmZfxFybdPvAPxT2Ybsf0Wz+n3vBaVso5HDQojCrm7d+vj4+DJkyFuAnh49XnvidytWdGXx4oX8\n9tsvqFQqXn99AK6ubnTs2JURI4ag1+t55ZWeuLiUy3a7TZo0YcuWHQwfPgitVsvw4aOxtrbB17cu\n778/lmHDAtBor6B0ygAAHQlJREFUNCQmJtKsWYssde3t7XnjjYG8844/VlYaxowZD8CkSR/y6adz\nWLZsMWq1mlKlSjF2bKBR4/Gk/Rk48H989NF0XFxcKF++IunpaVSs6Er37r0YPnwwKpWKVq3aoNEU\nraefVPqHr4MUUnFx9551FwqEM/8bCMD+HnUY2HGsUXWnfXeYC9fuMbx7HRrUdM7ymbOzg4yxmckY\n5w8ZZ/OTMTY/Z2eHPLchl/2LiOsrflDKTRp1eco3H5WSpuXCNcMvq09VyeMvhBBFnQT/IuLOX4Zn\nV09VtaFWWeMu+n/1+0mlbG0lE/2EEKKok+BfBOjSMyeiHGnhblQuf22GjhNRhsd/BnasZfK+CSGE\nKHgk+BcBNzcY0vkm2qppUiH7Z2Yf9vBEvxd8K5i0X0IIIQomCf5FwK1dfwKGS/51XeoYVff6rfsA\nvNWxlizdK4QQxYQE/yJAdz8ZgEPeJXB3MO7Z19MXbwJQU1L5CiFEsSEPdBdy+owMVP8+rNmhmnG5\np/V6PUkpWgCcStqYumtCiHxw7dpV3nyzDzVrGubspKWl8frrA2jZsvUz61NsbCw3b8bj5eXD/Pmf\n0KtXH6OT8jwsLu4GPXp0Yfr0ObRo0QowJOSZPDmQypWrApCRoWXo0JH4+dVl2bLFlC5d+qm5DYo7\nCf6F3PUfvwPgnq2ahuWNy+q3/fBlpaxWyyV/IQorD49KfPHFEgDu3r3DW2+9znPPPY+19bP5o/7Y\nscPcv5+Ml5cPo0YZl3PkcXbs2Iabmzs7d25Vgj8YEhlNnz4HMCzqM378KFau/CXP2ysOJPgXYnq9\nnrt7/gbggG8JhpfIPvvWAzqdnlV/Glbwa+ZT3iz9E0Lkv5IlS1G2rBMJCQloNBpmzZr2b359NRMm\nfED58uXp06c7NWrUonHjJlhYWLJu3RosLa2oXr0GY8dOICBgMLVrexMZeYrU1FSmTp1F+fIVWLx4\nISdOHEeny+DVV3vTvv1LxMZeY/r0YHQ6HeXLVyA4eBLffLMES0tLypUrz6pVKxg1ahyBge+ycuUv\nWFtbExp6lLVrVzFp0ofMnDmFe/fukZGRwejR4x+7Wt/27VsYM+Y9PvwwiPv372Nra/vId1xd3UhK\nSiIjIyM/hrnQk+BfiGXcvaOUE+pUMqru6n8DP0D/F2uarE9CFGfrzm0k9Ea4Sdus51KHV6vnPHHX\ntWtXuXv3Di4u5fj445n06fM6jRo1ISRkL9999zUTJkzi6tUYZs6cS9Wq1RgwoA9z5nxGuXLl+eOP\n9aSmpgCGPyIWLFjMzz+vYs2albRs2Ybr12NZuHApaWlpvP32G7Ro0YolSxbRp8/rNG/ekkWL5hMT\nE0PHjl0oXbo0zZu3ZNWqFVhYqGnYsDFHjx6madPm7N27m1at2rJmzU80adKUrl1f4cKF88yfP5fP\nPluUZX8uXbpIUlIijRo1oV69Buzdu5v27V96ZL9PnYrAxaVclgV9xJNJ8C/E7h05DMD5ihq6VX3R\nqLqnog0T/Xq3ro5GEvsIUahdumRYfhZAo9EwadIULC0tiYg4waVL0Xz33TJ0Oh2lSxsm9trY2FK1\najUA2rV7kaCg8bz4YkfatXtRuVXQqFFjAHx8fDlwYD/h4WGcPBmubEev1xEfH8+ZM5HKpf133hmF\ns7MDmzdvf6SPLVu2Yd++v2natDkHDx7A338IH3zwPrdv32Lr1k0Ayh8eD9u+fStt23YAoH37l9i0\naYMS/I8fP6b0p0SJEkyaNMUEo1k8SPAvxOJ+WgFATDkNHZyfvL7248TEJQHQoZG7yfslRHH1avUu\nRp2lm8rD9/wfZmlpxbRps3FycsryvpVV5j/9/fu/Rfv2Hdm1awcjRw5j4UJDOw+Wt32wHK6VlRVd\nurxM//5ZlwtXq9XodNkvEdOwYWMWLZpPVNQ5XF1dsbMrgZWVJWPGjMfHx/eJ9bZv34parWL//r3o\ndBlcvRrDvXuGdOQP3/MXxpFH/Qop/b+/mADHatmiVuX8R3k/VauUZaKfEEWXl5cPe/bsAuDo0cNs\n27Yly+c6nY7Fixfi5OREnz5v4ONTh9jYWADCwo4DEBERTuXKVbMsEZyamsq8eYagW6uWF8eOGa5C\nfv31V+zfvx+1Wv3IvXeNRkO1ap6sXPk9rVq1Vfr399+G/l24cJ5Vq37MUuf06ZPY2dmxcuUvfPvt\nSr7/fjVt2rRn9+6dphukYkqCfyGlS00FIMlGzbiGI4yq+92WSADKOFibvF9CiILD338we/bsYvjw\nQSxfvhQfn6xJwNRqNXZ2JRgy5C1GjRqGSqXC09OwNsj167G8++4Itm/fQu/efalTx4969RowZMhb\nBAQMombN2v9uYwjr1/9GQMBgrl2LoUmTJvj41GHFiu/Ztm1zlu21bNmGXbt20rx5SwB69nyNmJjL\nvPPO/5g9ezp169bP8v3t27fQuXPXLO917tyNHTu2mXSciiNZ0reQunDoT9KXfM/FCho6THv0ct/T\nvP2RISPgiFfrUK+GczbfliU684OMcf6Qcc6ZgIDBvPvue1StWt3oujLG5idL+hZjYdvXAKCxszeq\n3pnLt5Wyb/WyJu2TEEKIwkEm/BVC2vQ0vC4YZsXWav2yUXU/WxsGQHPfClio5W8/IcSjHjd5UBQt\n8q9/IXR1V+aknXKNX8hxPb1eT0qaYRJO37aPJtIQQghRPEjwL4TubjNMdrnauDoqI87eV24/C0DV\niiWxtZaLPkIIUVxJ8C+ENLcSAUht2cioehdi7wLQqJaLyfskhBCi8JDgX8joUu4b/q+Ceu4NjKp7\n/qoh+Lfwq2jyfgkhhCg8JPgXMon/Jt5I1aiwt7LLcb3klHQALC1UcslfiCJkwYJ5BAQMpl+/Hrz6\namcCAgYTFDT+WXcLgHnz5ihJg3JjyZJF/Pbbz4BhztL774/jt99Ms2rfw21np3v3TqT+m1vFXPJj\nGw+TKFDI3N23F4BTVW3xsch5kp7NBy8BUNO9tFn6JYR4NkaMGAPApk0bOH8+ioCA0c+4R5nGjHnP\nZG0tWbKIihUr8sorPUzWZnEmwb+QSb5+FYCT1WyxUOdsQZ6klHT+CIkGoFHtnC/7K4Qo3BYsmMep\nUxFotVp69epLhw4vMWyYP3Xq+HLq1EnS09OZOnUW5cplLusdGXmKefM+xsrKCmtrG6ZOnUlGRgZT\np35AcnIy9vYOTJkyk/T0dGbNylyO9913J1ClSlVeeuklKlWqStOmzdmw4TcCAz9g69ZNpKamEB19\nkZiYK4wZ8x6NGz/H999/w59/7sDV1ZW0tDTeeOMt/PzqPrIf27dv4fz5c8ya9Yny3ooV37Fnz24y\nMrQ0b96SAQP8iY2NZfLkQDQaDb6+dTl5MoL58xc9djsPW7Toc06eDEeny6Bnz760bdv+seMZG3uN\niRPf4+OPPyM6+iJLl36JpaUl5ctX4L33JhIaepSff15FUlISAQGjmTr1A557rikREeGULl2a2bPn\nkZycxMyZU0hMTPx33HKXTCmvJPgXNgm3AHCt7JXjKgvXZS4x2ty3gsm7JIQwiFu7Sllt01QcGjbC\nuVcfo+sdPXqYK1cu8eWXy0hOTmbAgL688IIhrW7p0o588cUSVq9ewc8/r2b48FFKvY0b19Oz52u0\nb/8Shw8fJCEhgT/+WE/Tpi/w6qu9WLnyB44ePUxk5Cl8fevRt+8bRESE88UX8/jkkwVER0czY8bH\neHhUZsOG35R24+LimDv3c/bt28P69evw9KzJ77+vY+XKX7h37y59+776SFAGiIw8zZ9/7mDlyl9Q\nP/R0k1ptwaJFXwPQq1c3evfux6pVP9Khw0v07NmHBQs+BeDWrVtP3c6xY0e4desmCxcuJTU1BX//\nN3nhhZZoNJos/UhNTWXatMm8//5kypRx5N13R7BgwWIcHBxYsOBTdu/+k5IlS3H+/HlWrvwZlUrF\nlSuX6dLlFUaOHIu/f38uXIjir7920qxZCzp16sq5c2dZuPBzPvnkc6N/vnklwb8Q0d42ZOfTWoCv\ns0+O6pyLuUPkJUO9if0boFbJQj5CFAeRkaeoW9cwKdjOzg4Pj0rExFwBMpfr9fb25YcfvslSr0WL\nVnz66Ryioy/Spk17PDwqceZMJO3aGZYN79evPwC//fYz//vfMAB8fOpw6ZLh6qKDgwMeHpUf6c+D\nM3oXFxcSExO5cuUS1at7Ym1tjbW1s7JWwH+dORNJr159WLRoPpMnT1Pe12isGD78f1hYWHL37h3u\n3btLdPQFOnY0rKrYrFkLzp07l+12wsPDCA8PU5YG1ukyuHkzgfLls54ozZkznVat2lK9uidxcTeI\nibnC++8bljK+f/8+Tk4ulCxZCk/PGlhZWaHVanFwcFCWTn6w3+HhYYSE7GPTpg0AaLVangUJ/oXI\n3QP7AbhcTkO9MtVyVGfuqlClXM21lFn6JYQwcO7VJ1dn6eagUql4eOkWrTZdWcUzcwlew3K9D2vc\n+DmWLv2O/fv3MH36ZEaOHItabYFer8vyPUM9Qzt6vR6dzpBAzMrK6rH9sbDIvE2p1xvqPHwm/99+\nPNCtW3defrkH48aN4o8/1tO5czdiYq7wyy9rWLbsR2xtbenXr4fSjwf7+KC97LZjZWVFt27d6dfv\nzcdu/wEXl3Js3ryR7t17YmlpRbly5R/JhHj48MEsyyVbWGQNsXq9YXtjx06gdm3jlmE3NZntX4gk\nnzoJwFkPa1zsnLL5tuGgT0s3/MLOH9ncrH0TQhQstWp5ERp6BICkpESuXbuKq6sbAGFhhpOCiIgT\nVK5cNUu9tWtXkZSUyIsvdqJnzz6cPXuG2rW9OHrU0Na6dWvZtm3zv0v5Gt47cSKMatVqGNW/ihVd\niYo6h1ar5ebNBM6ciXzid1UqFRMnBvPtt8uIjr7I7du3cXQsi62tLadORRAXF0d6ejqurm5ERp4G\n4MC/J0vZbefhpYpTUlL47LO5j+3DkCEBNGnyPN99t4wyZcqg1aZz6dJFANasWcn58+dytN8PL2N8\n/vw51qz5KUf1TE3O/AuRe2cjsQASKmcf+MFwyf8BBzvNU74phChq6tdvyIED+xk+fBBarZbhw0dj\nbW0DwLVrV3n33QCSkpKYMePjLPVcXd0IChqPg4MDGo2GoKAPsbCwYPr0YPbt+xt7e3uCg2eQkZHB\nrFlTGDlyKHq9nrFjA43qn5OTM61atWXw4AFUqlSF2rW9sbB48vmoo2NZRo8eR3BwEIsWfY2FhSXD\nhvlTt259unR5mU8+mc3o0WMJDg5ix45teHkZ2stuO3Xr1sfHx5chQ94C9PTo8doT+/DWW4MYPHgg\nLVq0YsKESUybNhkrKw3Ozi50796LhISEbPe7V6++zJw5hXfe+R96vc6kT0QYQ5b0LST0GRmcHeIP\nQMxkf1p7ZJ/T/8HSvdXdShH0hnEJgR4mS3San4xx/pBxhmHD/AkM/IBKlSqbpX1jxnjTpg106NAR\nlUrFm2++xueff0XZsjk7uXmcqKhz3L+fjI+PL1u2/EFERDjjxgWafDvPmimW9JUz/0Ii4ZQhuc8t\nB4scBf50beb9uTG9/MzWLyGEyK24uBsMGvQmVlYaOnbskueAbGtry9y5M1Gp1KjVaiZO/NAs2ykK\n5My/kPhtxlC8LqRwzNuePmO+yP77e86zft9FAL4JbJOnbcvZkvnJGOcPGWfzkzE2P1Oc+cuEv0Lg\n0r0reF1IAaB50545q3PdsPhP/w7GTcIRQghR9EnwLwQijmxXyq6NW+aozvFz8QD4VC1rlj4JIYQo\nvCT4FwLp+w4aCr5eT3wW9mF3k9KUslMpG3N1SwghRCElwb+Au516h0qxhpWe3LrmbEGL4OWHALCy\nVOfojwUhhBDFi1ln+8+cOZOwsDBUKhVBQUH4+voCcP36dcaNG6d87/Lly4wdO5b09HTmz5+Ph4cH\nAE2bNmXYsGHm7GKBF5t0A9sUw5xM2/8k43gcnV7PnUTDmf+7vWWWvxBCiEeZLfgfOnSI6OhoVq9e\nTVRUFEFBQaxevRqAcuXK8cMPPwCGvMb9+/enTZs2bN26lU6dOjFhwgRzdavQOX3zDNWtVFinP5qG\n83G++i1CKdf0KGPOrgkhhCikzHbZPyQkhHbt2gFQrVo17ty5Q2Ji4iPf+/XXX3nxxRcpUaKEubpS\nqMUmxmKdrkddyT3b7+r1eo78EwfAiB51zN01IYQQhZTZgn98fDxlymSeeTo6OhIXF/fI99auXUvP\nnpmPrx06dAh/f38GDBjAqVOnzNW9QkGn15FwyZAv2jI1+5Wfdh+/qpTreTqbrV9CCCEKt3zL8Pe4\nXEKhoaFUrVoVe3t7APz8/HB0dKRVq1aEhoYyYcIENmzYkG3bpkh4UFDN9/8C/HP23V4datGrQy2z\n9KMoj3FBIWOcP2SczU/GuOAzW/B3cXEhPj5eeX3jxg2cnbOeje7atYvnn39eeV2tWjWqVTMsVVuv\nXj1u3rxJRkZGlqUghRBCCJE3Zrvs36xZM7Zu3QrAyZMncXFxUc7wHwgPD6dWrcwz1aVLl7Jx40YA\nzpw5g6OjowR+IYQQwsTMduZfv359vL296dOnDyqViuDgYNatW4eDgwPt27cHIC4ujrJlMzPQde3a\nlfHjx7Nq1Sq0Wi0zZswwV/eEEEKIYqtILOwjhBBCiJyTDH9CCCFEMSPBXwghhChm8u1Rv9x4Unpg\ngP379/Ppp59iYWFBixYtGD58eLZ1xOMZO84HDx5k1KhReHp6AlCjRg0++OCDZ9X9QuFpY5yamsrk\nyZM5e/Ys69aty1Ed8Shjx1iO49x52jgfOHCATz/9FLVaTZUqVZgxYwZqtVqOZSMZO8aHDx82/ljW\nF1AHDx7UDx48WK/X6/Xnzp3T9+7dO8vnHTt21F+9elWfkZGh79u3r/7s2bPZ1hGPys04HzhwQD9i\nxIhn0d1CKbsxnjp1qn758uX67t2757iOyCo3YyzHsfGyG+f27dvrr127ptfr9foRI0bod+3aJcey\nkXIzxrk5lgvsZf+npQe+fPkypUqVokKFCqjValq2bElISEiOUwqLTLkZZ2Gc7I7LMWPGKJ/ntI7I\nKjdjLIyX3TivW7eO8uXLA4asrrdu3ZJj2Ui5GePcKLDB/2npgePi4nB0dHzks5ymFBaZcjPOAOfO\nnWPo0KH07duXffv25W+nC5nsjsv/5r/ISR2RVW7GGOQ4NlZOx/nGjRvs27ePli1byrFspNyMMRh/\nLBfoe/4P0+fiicTc1CnucjJmlStXJiAggI4dO3L58mXefPNNtm3bhkajyYceFn5yLJufHMf543Hj\nnJCQwNChQwkODs4SxJ5WRzxZTsY4N8dygT3zf1p64P9+dv36dVxcXHKUUlhklZtxLleuHJ06dUKl\nUuHh4YGTkxPXr1/P974XFrk5LuVYNk5uxkuOY+NlN86JiYkMGjSI0aNH07x58xzVEVnlZoxzcywX\n2OD/tPTAbm5uJCYmcuXKFbRaLX/99RfNmjXLUUphkVVuxnn9+vUsW7YMMNwaSEhIoFy5cs9sHwq6\n3ByXciwbJzfjJcex8bIb548++ogBAwbQokWLHNcRWeVmjHNzLBfoDH9z587lyJEjSnrgU6dOKemB\nDx8+zNy5cwHo0KED/v7+j63z8NoB4vGMHefExETGjRvH3bt3SU9PJyAgQLnvJB7vaWM8cuRIYmNj\nOXv2LD4+PvTu3ZuuXbvKsWwkY8e4devWchznwpPGuXnz5jRq1Ih69eop3+3SpQuvvfaaHMtGMnaM\nO3fubPSxXKCDvxBCCCFMr8Be9hdCCCGEeUjwF0IIIYoZCf5CCCFEMSPBXwghhChmJPgLIYQQxYwE\nfyEKgCtXruDj40P//v2z/Hf69Okn1lmwYAHz5s3Lx14+2ZIlS9i1axcAGzZsQKfTAdC/f38yMjLy\npQ+7d+/m9u3b+bItIQq7QpPeV4iiztHRkR9++OFZdyNXBg8erJQXLFhAx44dUavV+bo/3377LR9+\n+CGlS5fOt20KUVhJ8BeigIuKiiI4OBgLCwsSExMZPXo0L7zwgvK5Vqtl0qRJXLhwAZVKRe3atQkO\nDiYtLY2pU6cSHR1NUlISXbp04e23387S9rp169i+fTsqlYrr169TtWpVZs6ciZWVFYsWLWLXrl1Y\nWlri6enJpEmTSEtLY+zYsdy9exetVkvr1q0ZNmwYgYGBNGjQgGvXrhEdHc3AgQP54osvaNKkCSEh\nIXTq1Im///4bjUZDSkoKrVq1Ytu2bZw6dYqFCxei1+uxtLRk2rRpuLu7Z+ljmzZtlJzln3/+OfPn\nz1dWlyxfvjwff/wxa9eu5ciRI4wbN45Zs2ah1WqZPXs2Wq2W9PR0Jk+ejJeXl/l/WEIUEhL8hSjg\n4uPjGTVqFI0aNSI0NJRp06ZlCf5nzpwhLCyMzZs3A7BmzRru3bvH6tWrcXFxYfr06WRkZNC7d2+a\nNm36SHa18PBwtm3bhq2tLW+88QZ///03jo6ObNu2jbVr12JlZcXIkSPZuHEj9vb2aLVaVq5ciU6n\n44cfflAu8QOMHDmShQsX8u2332JpafjnpWTJktSvX589e/bQtm1bdu/eTePGjbGysiI4OJjVq1dT\nunRpduzYwZw5c1iwYMEjY1C5cmXGjx+PVqvF1taWlStXolar8ff3Z+/evfTr14+vv/6auXPnUqlS\nJbp27crChQvx8PAgMjKSoKAg1q1bZ44fjxCFkgR/IQqImzdv0r9//yzvzZ8/H2dnZ+bMmcO8efNI\nT09/5L52tWrVKFOmDIMGDaJ169Z07NgRBwcHDh48SGxsLIcPHwYgLS2NS5cuPRL869evj52dHQD1\n6tUjKiqKy5cv06hRI6ysrABo3Lgx4eHhDB8+nM8//5xRo0bRsmVLevXqhVqd/dShrl27snXrVtq2\nbcumTZvo1q0bZ8+eJS4ujhEjRgCQkZGBSqV6bP0H6UwtLS1Rq9X069cPS0tLzp8//8h65gkJCVy4\ncIGJEycq7yUmJqLT6XLUVyGKAwn+QhQQT7rnP3bsWDp37kzPnj05c+YMQ4cOzfK5tbU1K1eu5OTJ\nk/z111/07NmTn376CY1Gw/Dhw3nppZeeut2Hz9wfZPv+bxDW6/WoVCrKli3L77//TmhoKDt37qRH\njx78+uuv2e5bmzZtmD17Nnfu3OH48eN8/PHHnD9/nooVK+ZoXsCDP0KOHj3KL7/8wi+//IKdnR0j\nR4585LsajQYrK6tCO39CiPwgfwYLUcDFx8fj6ekJwKZNm0hLS8vyeXh4OL/++ive3t4EBATg7e3N\nxYsXadCggXIrQKfTMWvWrMfOhg8LC+P+/fvo9XqOHTtGzZo1qVu3LgcPHiQ9PR2AkJAQ/Pz82Lt3\nL7t27aJBgwa899572NnZkZCQkKU9lUqFVqvN8p61tTXPPfcc8+bNo3Xr1mg0GipXrsytW7c4c+YM\nAIcPH2b16tVPHYuEhARcXV2xs7MjJiaG48ePK+PxYLsODg64ubmxe/duAC5cuMAXX3yRo7EWoriQ\nM38hCri3336b9957Dzc3NwYOHMj27dv56KOPKFGiBAAeHh4sXLiQ1atXo9Fo8PDwoH79+vj5+XH2\n7Flee+01MjIyaNWq1WNnwteoUYP333+fK1eu4OnpSfPmzbGwsKBz5868/vrrqNVqvL296dKlC9eu\nXSMwMJCvv/4aCwsLmjdvjqura5b2XnjhBXr06MGXX36Z5f2uXbsyaNAgfvzxRwBsbGz4+OOPmThx\nItbW1gBMnTr1qWPRrFkzvvnmG/r27YunpycjRoxg4cKFNGnShObNmzN06FBmz57N7NmzmT59OkuW\nLEGr1RIYGJjr8ReiKJJV/YQoxtatW8f+/fuVZZuFEMWDXPYXQgghihk58xdCCCGKGTnzF0IIIYoZ\nCf5CCCFEMSPBXwghhChmJPgLIYQQxYwEfyGEEKKYkeAvhBBCFDP/Bx/o1GuPpFCZAAAAAElFTkSu\nQmCC\n",
            "text/plain": [
              "<Figure size 576x396 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": []
          }
        }
      ]
    }
  ]
}


================================================
FILE: experiments/tf_trainer/tf_hub_tfjs/run.local.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"

python -m tf_trainer.tf_hub_tfjs.run \
  --train_path=$train_path \
  --validate_path=$valid_path \
  --model_dir="tf_hub_tfjs_local_model_dir" \
  --train_steps=9000 \
  --labels=toxicity,severe_toxicity,obscene,sexual_explicit,identity_attack,insult,threat


================================================
FILE: experiments/tf_trainer/tf_hub_tfjs/run.py
================================================
"""Experiments with Toxicity Dataset"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import tfrecord_input
from tf_trainer.tf_hub_tfjs import model as tf_hub_classifier

import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import sentencepiece as spm

FLAGS = tf.app.flags.FLAGS


class TFRecordWithSentencePiece(tfrecord_input.TFRecordInput):
  """Specialized setencepiece based input preprocessor."""

  def __init__(self, spm_path):
    super().__init__()
    self._sp = spm.SentencePieceProcessor()
    self._sp.Load(spm_path)

  def dense_ids(self, texts):
    """Pads sentences ids out to max length, filling with 0's."""
    return pd.DataFrame(
        [self._sp.EncodeAsIds(x) for x in texts]).fillna(0).values.astype(int)

  def pieces(self, feature_dict, label_dict):
    """Processes a batch of texts into sentence pieces."""
    text = feature_dict.pop('text')
    sparse_ids = tf.contrib.layers.dense_to_sparse(
        tf.py_func(self.dense_ids, [text], tf.int64))
    feature_dict['values'] = sparse_ids.values
    feature_dict['indices'] = sparse_ids.indices
    feature_dict['dense_shape'] = sparse_ids.dense_shape
    return feature_dict, label_dict

  def _input_fn_from_file(self, filepath: str):
    filenames_dataset = tf.data.Dataset.list_files(filepath)
    dataset = tf.data.TFRecordDataset(
        filenames_dataset)  # type: tf.data.TFRecordDataset
    # Use parent class parsing to obtain text features, and processed labels.
    parsed_dataset = dataset.map(self._read_tf_example)
    return parsed_dataset.batch(self._batch_size).map(
        self.pieces).prefetch(self._num_prefetch)


def main(argv):
  del argv  # unused

  module = hub.Module(FLAGS.model_spec)
  with tf.Session() as sess:
    spm_path = sess.run(module(signature='spm_path'))

  dataset = TFRecordWithSentencePiece(spm_path)
  model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval()

  values = tf.placeholder(tf.int64, shape=[None], name='values')
  indices = tf.placeholder(tf.int64, shape=[None, 2], name='indices')
  dense_shape = tf.placeholder(tf.int64, shape=[None], name='dense_shape')
  serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
    'values': values,
    'indices': indices,
    'dense_shape': dense_shape
  })
  trainer.export(serving_input_fn, None)


if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tf_trainer/tf_kona_prototypical_network/proto.py
================================================
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import sys
import datetime
import collections

tf.app.flags.DEFINE_string(
    "train_file",
    "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/train_cleaned_text.csv",
    "CSV file containing the training data. Expects columns: domain, label, support_or_query"
)
tf.app.flags.DEFINE_string(
    "validation_file",
    "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_cleaned_text.csv",
    "CSV file containing the validation data. Expects columns: domain, label, support_or_query"
)
tf.app.flags.DEFINE_string(
    "test_file",
    "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_cleaned_text.csv",
    "CSV file containing the test data. Expects columns: domain, label, support_or_query"
)
tf.app.flags.DEFINE_boolean(
    "test_mode", False,
    "If true then no training occurs and it prints out metrics on the test set."
)
tf.app.flags.DEFINE_string("model_dir", "", "The model directory in GCS.")
tf.app.flags.DEFINE_string(
    "encoding_layers", "256,128",
    "Comma delimited integers representing the number of units for each dense layer."
)

FLAGS = tf.app.flags.FLAGS


def distance(embeddings, prototype):
  return tf.map_fn(tf.norm, embeddings - prototype)


def neg_distance(embs, proto):
  return -distance(embs, proto)


def calculate_logits(embeddings, positive_prototype, negative_prototype):
  negative_logits = neg_distance(embeddings, negative_prototype)
  positive_logits = neg_distance(embeddings, positive_prototype)
  return tf.stack([negative_logits, positive_logits], axis=1)


def prepare_dataset(data):
  data["text"] = data.text.fillna("")
  domains = data.domain.unique()

  positive_supports = []
  positive_queries = []
  negative_supports = []
  negative_queries = []

  for domain in domains:
    domain_data = data[data["domain"] == domain]
    positive = domain_data[domain_data["label"] == 1]
    negative = domain_data[domain_data["label"] == 0]
    positive_support = positive[positive["support_or_query"] == "support"].text
    positive_query = positive[positive["support_or_query"] == "query"].text
    negative_support = negative[negative["support_or_query"] == "support"].text
    negative_query = negative[negative["support_or_query"] == "query"].text

    positive_supports.append(positive_support)
    positive_queries.append(positive_query)
    negative_supports.append(negative_support)
    negative_queries.append(negative_query)

  return tf.data.Dataset.from_tensor_slices({
      "positive_supports": np.array(positive_supports),
      "negative_supports": np.array(negative_supports),
      "positive_queries": np.array(positive_queries),
      "negative_queries": np.array(negative_queries)
  })


def encoder(dense_config, output_types, output_shapes):
  """Tensorflow graph for getting prototypes and embeddings.

  It contains a placeholder for a tensorflow Iterator called "handle" whose
  elements are a dict containing negative_supports, positive_supports,
  negative_queries, and positive_queries. All of these are lists of strings.

  Args:
    dense_config: A list of integers that configure the dense layers.
    output_types: A dictionary from output name to it's tf type.
    output_shapes: A dictionary from output name to it's shape.

  Returns:
    A tuple of logits, the first representing those from the negative query set
    and the second from the positive query set.
  """

  if not dense_config:
    raise ValueError("encoder must be called with a non empty dense_config")

  embed = hub.Module(
      "https://tfhub.dev/google/universal-sentence-encoder-large/3")
  dense_layers = [
      tf.keras.layers.Dense(units, activation=tf.nn.relu)
      for units in dense_config
  ]
  last_layer = tf.keras.layers.Dense(dense_config[-1], activation=None)

  def get_embeddings(texts):
    result = embed(texts)
    for dense_layer in dense_layers:
      result = dense_layer(result)
    return last_layer(result)

  get_prototype = lambda texts: tf.reduce_mean(get_embeddings(texts), 0)

  handle = tf.placeholder(tf.string, shape=[])
  iterator = tf.data.Iterator.from_string_handle(handle, output_types,
                                                 output_shapes)
  episode_batch = iterator.get_next()

  with tf.variable_scope("negative_prototype"):
    negative_prototype = get_prototype(episode_batch["negative_supports"])
  with tf.variable_scope("positive_prototype"):
    positive_prototype = get_prototype(episode_batch["positive_supports"])
  with tf.variable_scope("negative_embeddings"):
    negative_embeddings = get_embeddings(episode_batch["negative_queries"])
  with tf.variable_scope("positive_embeddings"):
    positive_embeddings = get_embeddings(episode_batch["positive_queries"])

  negative_logits = calculate_logits(negative_embeddings, positive_prototype,
                                     negative_prototype)
  positive_logits = calculate_logits(positive_embeddings, positive_prototype,
                                     negative_prototype)

  return handle, negative_logits, positive_logits


def train_operation(negative_logits, positive_logits):
  negative_loss = tf.losses.softmax_cross_entropy(
      tf.broadcast_to(tf.one_hot(0, 2), tf.shape(negative_logits)),
      negative_logits)
  positive_loss = tf.losses.softmax_cross_entropy(
      tf.broadcast_to(tf.one_hot(1, 2), tf.shape(positive_logits)),
      positive_logits)
  loss = negative_loss + positive_loss

  optimizer = tf.train.AdamOptimizer(0.001)
  train = optimizer.minimize(loss)
  return (train, loss)


def predictions_and_metrics(negative_logits, positive_logits):
  predict = lambda logits: tf.argmax(logits, axis=1)

  negative_predictions = predict(negative_logits)
  negative_labels = tf.fill(tf.shape(negative_predictions), 0)
  positive_predictions = predict(positive_logits)
  positive_labels = tf.fill(tf.shape(positive_predictions), 1)

  probability = tf.nn.softmax(
      tf.concat([negative_logits, positive_logits], -2), axis=-1)
  labels = tf.concat([negative_labels, positive_labels], -1)
  predictions = tf.concat([negative_predictions, positive_predictions], -1)

  acc_op, update_acc_op = tf.metrics.accuracy(labels, predictions)
  auc_op, update_auc_op = tf.metrics.auc(labels,
                                         tf.gather(probability, 1, axis=-1))
  return (predictions, acc_op, auc_op, update_acc_op, update_auc_op)


def main():
  if FLAGS.model_dir:
    model_dir = FLAGS.model_dir
  else:
    st = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    model_dir = "gs://conversationai-models/jjtan/transfer_learning/model/" + st
  print("Model dir: " + model_dir)
  save_path = model_dir + "/save/model.ckpt"
  metadata_path = model_dir + "/meta.txt"

  with tf.gfile.Open(metadata_path, "w") as f:
    f.write("Encoding Layers: " + FLAGS.encoding_layers + "\n")

  # TODO(jjtan): Convert to flags.
  output_types = {
      "negative_queries": tf.string,
      "negative_supports": tf.string,
      "positive_queries": tf.string,
      "positive_supports": tf.string
  }
  output_shapes = {
      "negative_queries": tf.TensorShape([tf.Dimension(12)]),
      "negative_supports": tf.TensorShape([tf.Dimension(8)]),
      "positive_queries": tf.TensorShape([tf.Dimension(12)]),
      "positive_supports": tf.TensorShape([tf.Dimension(8)])
  }

  with tf.variable_scope("encoder"):
    encoding_units = [int(units) for units in FLAGS.encoding_layers.split(",")]
    handle, negative_logits, positive_logits = encoder(encoding_units,
                                                       output_types,
                                                       output_shapes)

  if FLAGS.test_mode:
    print("In TEST mode.")
    with tf.gfile.Open(FLAGS.test_file, "r") as f:
      test_df = pd.read_csv(f)
      print("Test Dataframe Shape: " + str(test_df.shape))
      test_ds = prepare_dataset(test_df).shuffle(64)

    # Test specific model components.
    with tf.variable_scope("test_predictions_and_metrics"):
      _, acc_op, auc_op, update_acc_op, update_auc_op = predictions_and_metrics(
          negative_logits, positive_logits)

    saver = tf.train.Saver()

    test_itr = test_ds.make_one_shot_iterator()
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run(tf.tables_initializer())
      sess.run(tf.initializers.local_variables())

      checkpoint = tf.train.latest_checkpoint(model_dir + "/save")
      saver.restore(sess, checkpoint)
      test_itr_handle = sess.run(test_itr.string_handle())
      while True:
        try:
          _, _ = sess.run([update_acc_op, update_auc_op],
                          feed_dict={handle: test_itr_handle})
        except tf.errors.OutOfRangeError:
          break
      test_acc, test_auc = sess.run([acc_op, auc_op])
      print("TEST ACCURACY: " + str(test_acc))
      print("TEST AUC: " + str(test_auc))
  else:
    print("In TRAINING mode.")

    with tf.gfile.Open(FLAGS.train_file, "r") as f:
      train_df = pd.read_csv(f)
      print("Train Dataframe Shape: " + str(train_df.shape))
      train_dataset = prepare_dataset(train_df).shuffle(128).repeat()

    with tf.gfile.Open(FLAGS.validation_file, "r") as f:
      validation_df = pd.read_csv(f)
      print("Validation Dataframe Shape: " + str(validation_df.shape))
      validation_dataset = prepare_dataset(validation_df).shuffle(64)

    # Training specific model components.
    with tf.variable_scope("training_operations"):
      train_op, loss_op = train_operation(negative_logits, positive_logits)
    with tf.variable_scope("train_predictions_and_metrics"):
      _, train_acc_op, train_auc_op, train_update_acc_op, train_update_auc_op = predictions_and_metrics(
          negative_logits, positive_logits)
    with tf.variable_scope("validation_predictions_and_metrics"):
      _, val_acc_op, val_auc_op, val_update_acc_op, val_update_auc_op = predictions_and_metrics(
          negative_logits, positive_logits)

    saver = tf.train.Saver()

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run(tf.tables_initializer())
      sess.run(tf.initializers.local_variables())

      train_writer = tf.summary.FileWriter(model_dir + "/train", sess.graph)
      validation_writer = tf.summary.FileWriter(model_dir + "/validation",
                                                sess.graph)

      training_iterator = train_dataset.make_one_shot_iterator()
      validation_iterator = validation_dataset.make_initializable_iterator()
      training_handle = sess.run(training_iterator.string_handle())
      validation_handle = sess.run(validation_iterator.string_handle())

      best_auc = 0
      for batch_num in range(500):
        print("Batch: " + str(batch_num))

        batch_size = 32
        for i in range(batch_size):
          _, loss, train_acc, train_auc = sess.run(
              [train_op, loss_op, train_update_acc_op, train_update_auc_op],
              feed_dict={handle: training_handle})

          training_summary = tf.Summary(value=[
              tf.Summary.Value(tag="loss", simple_value=loss),
              tf.Summary.Value(tag="accuracy", simple_value=train_acc),
              tf.Summary.Value(tag="auc", simple_value=train_auc),
          ])
          train_writer.add_summary(training_summary, batch_num * batch_size + i)
          train_writer.flush()

        recent_aucs = collections.deque([], 3)

        sess.run(validation_iterator.initializer)
        for _ in range(32):
          _, _ = sess.run([val_update_acc_op, val_update_auc_op],
                          feed_dict={handle: validation_handle})
        val_acc, val_auc = sess.run([val_acc_op, val_auc_op])

        # Save best version
        if val_auc > best_auc:
          best_auc = val_auc
          saved_path = saver.save(
              sess, save_path, global_step=(batch_num + 1) * batch_size)

        # Early stopping
        if len(recent_aucs) >= 3 and all(
            val_auc < prev_auc for prev_auc in recent_aucs):
          break
        recent_aucs.append(val_auc)

        validation_summary = tf.Summary(value=[
            tf.Summary.Value(tag="accuracy", simple_value=val_acc),
            tf.Summary.Value(tag="auc", simple_value=val_auc),
        ])
        validation_writer.add_summary(validation_summary.SerializeToString(),
                                      (batch_num + 1) * batch_size)
        validation_writer.flush()


if __name__ == "__main__":
  main()


================================================
FILE: experiments/tf_trainer/tf_word_label_embedding/__init__.py
================================================


================================================
FILE: experiments/tf_trainer/tf_word_label_embedding/hparam_config.yaml
================================================
trainingInput:
  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc/frac_neg
    maxTrials: 20
    maxParallelTrials: 3
    enableTrialEarlyStopping: TRUE
    params:
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.0000005
        maxValue: 0.01
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 1
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
        - 128
        - 256
      - parameterName: dense_units
        type: CATEGORICAL
        categoricalValues:
        - '128'
        - '128,128'
        - '128,128,128'
        - '64'
        - '64,64'
        - '64,64,64'


================================================
FILE: experiments/tf_trainer/tf_word_label_embedding/model.py
================================================
"""Tensorflow Estimator implementation of Word Label Embeddings."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
from tf_trainer.common import base_model
from typing import Set

FLAGS = tf.app.flags.FLAGS

# Hyperparameters
tf.app.flags.DEFINE_float('learning_rate', 0.000003,
                          'The learning rate to use during training.')
tf.app.flags.DEFINE_integer('embedding_size', 100,
                            'The number of dimensions in the word embedding.')
# This would normally just be a multi_integer, but we use string due to
# constraints with ML Engine hyperparameter tuning.
tf.app.flags.DEFINE_string(
    'dense_units', '128',
    'Comma delimited string for the number of hidden units in the dense layer.')


class TFWordLabelEmbeddingModel(base_model.BaseModel):

  def __init__(self, target_label: str) -> None:
    assert len(target_label) == 1  # Only single feature supported.
    self._target_label = target_label[0]

  @staticmethod
  def hparams():
    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]
    hparams = tf.contrib.training.HParams(
        learning_rate=FLAGS.learning_rate,
        embedding_size=FLAGS.embedding_size,
        dense_units=dense_units)
    return hparams

  def estimator(self, model_dir):
    estimator = tf.estimator.Estimator(
        model_fn=self._model_fn,
        params=self.hparams(),
        config=tf.estimator.RunConfig(model_dir=model_dir))
    return estimator

  def _model_fn(self, features, labels, mode, params, config):
    word_emb_seq = features[base_model.TOKENS_FEATURE_KEY]

    # Constants

    labels = labels[self._target_label]

    # Class emb
    class_emb_initializer = tf.random_normal_initializer(
        mean=0.0, stddev=1.0, dtype=tf.float32)
    class_embs = tf.get_variable(
        'class_embs', [2, params.embedding_size],
        initializer=class_emb_initializer)

    word_emb_seq_norm = tf.nn.l2_normalize(word_emb_seq, axis=-1)
    class_embs_norm = tf.nn.l2_normalize(class_embs, axis=-1)

    cosine_distance = tf.contrib.keras.backend.dot(
        word_emb_seq_norm, tf.transpose(class_embs_norm))
    cosine_distance = tf.expand_dims(cosine_distance, axis=-1)
    cosine_distance = tf.contrib.layers.conv2d(
        cosine_distance,
        num_outputs=1,
        kernel_size=[5, 1],
        padding='SAME',
        activation_fn=tf.nn.relu)
    cosine_distance = tf.squeeze(cosine_distance, axis=-1)

    max_cosine_distance = tf.reduce_max(cosine_distance, axis=-1)
    attention = tf.nn.softmax(max_cosine_distance, axis=-1)
    attention = tf.expand_dims(attention, axis=-1)

    weighted_word_emb = tf.reduce_sum(word_emb_seq * attention, axis=1)

    f2 = []
    for num_units in params.dense_units:
      f2.append(tf.layers.Dense(units=num_units, activation=tf.nn.relu))
    f2.append(tf.layers.Dense(units=1, activation=None))

    logits = weighted_word_emb
    for layer in f2:
      logits = layer(logits)

    class_zero_logits = tf.expand_dims(class_embs[0, :], 0)
    for layer in f2:
      class_zero_logits = layer(class_zero_logits)
    class_zero_reg = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=[[0.0]], logits=class_zero_logits)

    class_one_logits = tf.expand_dims(class_embs[1, :], 0)
    for layer in f2:
      class_one_logits = layer(class_one_logits)
    class_one_reg = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=[[1.0]], logits=class_one_logits)

    loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=labels, logits=logits) + class_zero_reg + class_one_reg
    head = tf.contrib.estimator.binary_classification_head(
        name=self._target_label, loss_fn=lambda labels, logits: loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
    return head.create_estimator_spec(
        features=features,
        labels=labels,
        mode=mode,
        logits=logits,
        optimizer=optimizer)


================================================
FILE: experiments/tf_trainer/tf_word_label_embedding/run.hyperparameter.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_word_label_embedding"
MODEL_NAME_DATA="${MODEL_NAME}_$1"
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.10 \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --verbosity=debug \
    --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \
    -- \
    --train_path=$train_path \
    --validate_path=$valid_path \
    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \
    --embedding_size=300 \
    --model_dir="${JOB_DIR}/model_dir" \
    --is_embedding_trainable=False \
    --train_steps=$train_steps \
    --eval_period=$eval_period \
    --eval_steps=$eval_steps \
    --labels=$labels \
    --label_dtypes=$label_dtypes \
    --preprocess_in_tf=False

echo "Model dir:"
echo ${JOB_DIR}/model_dir

================================================
FILE: experiments/tf_trainer/tf_word_label_embedding/run.local.sh
================================================
#!/bin/bash

source "tf_trainer/common/dataset_config.sh"

python -m tf_trainer.tf_word_label_embedding.run \
  --train_path=$train_path \
  --validate_path=$valid_path \
  --model_dir="tf_word_label_embedding_local_model_dir" \
  --labels=$labels \
  --label_dtypes=$label_dtypes

================================================
FILE: experiments/tf_trainer/tf_word_label_embedding/run.ml_engine.sh
================================================
#!/bin/bash
# This script runs one training job on Cloud MLE.

# Note:
# We currently use 2 different embeddings:
# - glove.6B/glove.6B.300d.txt
# - google-news/GoogleNews-vectors-negative300.txt
# Glove assumes all words are lowercased, while Google-news handles different casing.
# As there is currently no tf operation that perform lowercasing, we have the following 
# requirements:
# - For google news: Run preprocess_in_tf=True (no lowercasing).
# - For glove.6B, Run preprocess_in_tf=False (will force lowercasing).

source "tf_trainer/common/dataset_config.sh"
DATETIME=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME="tf_word_label_embedding"
MODEL_NAME_DATA="${MODEL_NAME}_$1"
JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}"

gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME}_${USER}_${DATETIME} \
    --job-dir=${JOB_DIR} \
    --runtime-version=1.10 \
    --scale-tier 'BASIC_GPU' \
    --module-name="tf_trainer.${MODEL_NAME}.run" \
    --package-path=tf_trainer \
    --python-version "3.5" \
    --region=us-east1 \
    --verbosity=debug \
    -- \
    --train_path="${GCS_RESOURCES}/toxicity_q42017_train.tfrecord" \
    --validate_path="${GCS_RESOURCES}/toxicity_q42017_validate.tfrecord" \
    --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \
    --embedding_size=300 \
    --preprocess_in_tf=False \
    --model_dir="${JOB_DIR}/model_dir" 


================================================
FILE: experiments/tf_trainer/tf_word_label_embedding/run.py
================================================
"""Experiments with Toxicity Dataset"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import nltk
import tensorflow as tf

from tf_trainer.common import base_model
from tf_trainer.common import model_trainer
from tf_trainer.common import serving_input
from tf_trainer.common import text_preprocessor
from tf_trainer.common import tfrecord_input
from tf_trainer.common import types
from tf_trainer.tf_word_label_embedding import model as tf_word_label_embedding

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('embeddings_path',
                           'local_data/glove.6B/glove.6B.100d.txt',
                           'Path to the embeddings file.')


def main(argv):
  del argv  # unused

  preprocessor = text_preprocessor.TextPreprocessor(FLAGS.embeddings_path)

  nltk.download('punkt')
  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)
  dataset = tfrecord_input.TFRecordInputWithTokenizer(
      train_preprocess_fn=train_preprocess_fn, max_seq_len=5000)

  model_tf = tf_word_label_embedding.TFWordLabelEmbeddingModel(dataset.labels())
  model = preprocessor.add_embedding_to_model(model_tf,
                                              base_model.TOKENS_FEATURE_KEY)

  trainer = model_trainer.ModelTrainer(dataset, model)
  trainer.train_with_eval()


if __name__ == '__main__':
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.app.run(main)


================================================
FILE: experiments/tools/bert_tfrecord_converter.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Converts our TFRecord data into the format expected by the BERT model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import bert
from bert import run_classifier
import collections
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub

tf.app.flags.DEFINE_string('input_data_path', None,
                           'Path to the input TFRecord files.')
tf.app.flags.DEFINE_string('output_data_path', None,
                           'Path to write the output TFRecord files.')
tf.app.flags.DEFINE_string('filenames', None,
                           'Comma separated list of filenames.')
tf.app.flags.DEFINE_string('text_key', 'comment_text',
                           'tf.Example key for text field in input TFRecord.')
tf.app.flags.DEFINE_string('label_key', 'toxicity',
                           'tf.Example key for label field in input TFRecord.')
tf.app.flags.DEFINE_string('bert_url', 'https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1', 'TF Hub URL for BERT Model')
tf.app.flags.DEFINE_integer('max_sequence_length', 128,
                            'Maximum sequence length of tokenized comment.')

FLAGS = tf.app.flags.FLAGS

def create_int_feature(values):
  f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
  return f

def create_tokenizer_from_hub_module(url):
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(url)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])

  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)


def convert_tfrecord_for_bert(filenames,
                              input_data_path,
                              output_data_path,
                              bert_tfhub_url,
                              text_key,
                              label_key,
                              max_seq_length):
  """Converts input TFRecords into the format expected by the BERT model."""
  tokenizer = create_tokenizer_from_hub_module(bert_tfhub_url)
  for filename in filenames:
    print('Working on {}...'.format(filename))
    in_filepath = '{}{}'.format(input_data_path, filename)
    #TODO: Check if file exists, if not write new file
    #TODO: Have the filename reflect the max_sequence_length and path reflect model
    out_filepath = '{}{}'.format(output_data_path, filename)
    record_iterator = tf.python_io.tf_record_iterator(path=in_filepath)
    writer = tf.python_io.TFRecordWriter(out_filepath)
    for ex_index, string_record in enumerate(record_iterator):
      example = tf.train.Example()
      example.ParseFromString(string_record)
      text = example.features.feature[text_key].bytes_list.value[0]
      label = example.features.feature[label_key].float_list.value[0]
      label = round(label)
      ex = run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping
                                      text_a = text, 
                                      text_b = None, 
                                      label = label)
      label_list = [0, 1]
      feature = run_classifier.convert_single_example(ex_index, ex, label_list,
                                                      max_seq_length, tokenizer)
      features = collections.OrderedDict()
      features["input_ids"] = create_int_feature(feature.input_ids)
      features["input_mask"] = create_int_feature(feature.input_mask)
      features["segment_ids"] = create_int_feature(feature.segment_ids)
      features["label_ids"] = create_int_feature([feature.label_id])
      features["is_real_example"] = create_int_feature(
          [int(feature.is_real_example)])

      tf_example = tf.train.Example(features=tf.train.Features(feature=features))
      writer.write(tf_example.SerializeToString())
    writer.close()
    print('... Done!')

if __name__ == '__main__':
  filenames = [name.strip() for name in FLAGS.filenames.split(',')]
  convert_tfrecord_for_bert(filenames,
                            FLAGS.input_data_path,
                            FLAGS.output_data_path,
                            FLAGS.bert_url,
                            FLAGS.text_key,
                            FLAGS.label_key,
                            FLAGS.max_sequence_length)

================================================
FILE: experiments/tools/convert_csv_to_tfrecord.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A function to convert csvs to TFRecords."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import tensorflow as tf

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("input_csv_path", None,
                           "Path to the input csv file.")
tf.app.flags.DEFINE_string("output_tfrecord_path", None,
                           "Path where the output TFRecord should be written.")
tf.app.flags.DEFINE_string("column_list", None, 
                           "Comma seperated list of column names.")
tf.app.flags.DEFINE_string("dtype_list", None, 
                           "Comma seperated list of column dtypes. "
                           "Each entry should be one of [bytes,str,float,int]).")


def convert_csv_to_tfrecord(input_csv_path,
                            output_tfrecord_path,
                            column_names,
                            column_dtypes):
  df = pd.read_csv(tf.gfile.Open(input_csv_path))
  with tf.python_io.TFRecordWriter(output_tfrecord_path) as writer:
    for row in df.iterrows():
      row = row[1]
      example = tf.train.Example()
      for col_name,dtype in zip(column_names,column_dtypes):
        col_val = row[col_name]
        if dtype == 'bytes':
          example.features.feature[col_name].bytes_list.value.append(
              col_val)
        elif dtype == 'str':
          example.features.feature[col_name].bytes_list.value.append(
              str(col_val).encode("utf-8", errors="replace"))
        elif dtype == 'float':
          example.features.feature[col_name].float_list.value.append(col_val)
        elif dtype == 'int':
          example.features.feature[col_name].int64_list.value.append(col_val)
        else:
          raise ValueError('dtype must be one of bytes, str, float, int.')
      writer.write(example.SerializeToString())


def main(argv):
  del argv  # unused

  input_csv_path = FLAGS.input_csv_path
  output_tfrecord_path = FLAGS.output_tfrecord_path
  column_names = FLAGS.column_list.split(',')
  column_dtypes = FLAGS.dtype_list.split(',')
  assert len(column_names) == len(column_dtypes)

  convert_csv_to_tfrecord(input_csv_path, 
                          output_tfrecord_path,
                          column_names,
                          column_dtypes)


if __name__ == "__main__":
  tf.app.run(main)


================================================
FILE: experiments/tools/convert_jsonl_to_tfrecord.py
================================================
# coding=utf-8
# Copyright 2018 The Conversation-AI.github.io Authors.
#
# Licensed under the Apache License, Version 2.0 (the 'License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A function to convert jsonlines to TFRecords.

python tools/convert_jsonl_to_tfrecord.py \
 --input_jsonlines_path=tf_trainer/common/testdata/cats_and_dogs.jsonl \
 --text_fields_re='^(text)$' \
 --label_fields_re='^(bad)$' \
 --output_tfrecord_path=local_data/testdata/cats_and_dogs.recordio
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import flags
from absl import app
from absl import logging
import json
import jsonlines
import tensorflow as tf
import re

FLAGS = flags.FLAGS

# TODO: Compute basic stats for text fields and labels.
flags.DEFINE_string('text_fields_re', None,
                    'Matcher for names of the text fields.')
flags.register_validator(
    'text_fields_re',
    lambda value: isinstance(value, str) and re.compile(value),
    message='--text_fields_re must be a regexp string.')

flags.DEFINE_string('label_fields_re', None,
                    'Matcher for names of the label fields.')
flags.register_validator(
    'label_fields_re',
    lambda value: isinstance(value, str) and re.compile(value),
    message='--label_fields_re must be a regexp string.')

flags.DEFINE_string('input_jsonlines_path', None,
                    'Path to the JSON-lines input file.')
flags.register_validator(
    'input_jsonlines_path',
    lambda value: isinstance(value, str),
    message='--input_jsonlines_path must be a string.')

flags.DEFINE_string('output_tfrecord_path', None,
                    'Path where the output TFRecord should be written.')
flags.register_validator(
    'output_tfrecord_path',
    lambda value: isinstance(value, str),
    message='--output_tfrecord_path must be a string.')


class MisingAllTextFieldsError(Exception):
  pass


class FieldsCounter():

  def __init__(self):
    self.counters = {}

  def inc_field(self, field_name: str):
    if field_name not in self.counters:
      self.counters[field_name] = 0
    self.counters[field_name] += 1


def make_selected_output_row(row, line, counters):
  """Create an output row with just the fields matching --text_fields_re and

  --label_fields_re. If there is no matching field in the row for
  --text_fields_re then raise MisingAllTextFieldsError.
  """
  text_field_matcher = re.compile(FLAGS.text_fields_re)
  label_field_matcher = re.compile(FLAGS.label_fields_re)
  has_text_field = False
  output_row = {}
  for key, value in row.items():
    if text_field_matcher.match(key):
      has_text_field = True
      counters.inc_field(key)
      output_row[key] = value
    elif label_field_matcher.match(key):
      counters.inc_field(key)
      output_row[key] = value
  if not has_text_field:
    raise MisingAllTextFieldsError(
        f'Error parsing file {FLAGS.input_jsonlines_path} at line: {line}.\n'
        f'No field matched by --text_field_regexp="{FLAGS.text_fields_re}":\n'
        f'  {json.dumps(row, sort_keys=True, indent=2)}')
  return output_row


def itr_as_dict(input_jsonlines_path):
  with tf.gfile.Open(input_jsonlines_path) as f:
    counters = FieldsCounter()
    line = 0
    for row in jsonlines.Reader(f):
      line += 1
      yield make_selected_output_row(row, line, counters)
    logging.info(f'Complete.\nField Counts:\n'
                 f'{json.dumps(counters.counters, sort_keys=True, indent=2)}')


def itr_as_tfrecord(input_jsonlines_path):
  for row in itr_as_dict(input_jsonlines_path):
    example = tf.train.Example()
    for key, value in row.items():
      if isinstance(value, str):
        example.features.feature[key].bytes_list.value.append(
            value.encode('utf-8', errors='replace'))
      elif isinstance(value, float) or isinstance(value, int):
        example.features.feature[key].float_list.value.append(value)
    yield example


def convert_to_tfrecord(input_jsonlines_path, output_tfrecord_path):
  with tf.python_io.TFRecordWriter(output_tfrecord_path) as writer:
    for example in itr_as_tfrecord(input_jsonlines_path):
      writer.write(example.SerializeToString())


def main(argv):
  del argv  # unused
  convert_to_tfrecord(FLAGS.input_jsonlines_path, FLAGS.output_tfrecord_path)


if __name__ == '__main__':
  app.run(main)


================================================
FILE: hierarchical_attention_research/han_model/.gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject


================================================
FILE: hierarchical_attention_research/han_model/HAN_model.py
================================================
import tensorflow as tf
import tensorflow.contrib.layers as layers
import numpy as np
import data_util
from model_components import task_specific_attention, bidirectional_rnn


class HANClassifierModel():
  """ Implementation of document classification model described in

    `Hierarchical Attention Networks for Document Classification (Yang et al.,
    2016)`
    (https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf)
  """

  def __init__(self,
               vocab_size,
               embedding_size,
               classes,
               fw_word_cell,
               bw_word_cell,
               fw_sentence_cell,
               bw_sentence_cell,
               word_output_size,
               sentence_output_size,
               max_grad_norm,
               dropout_keep_proba,
               is_training=None,
               learning_rate=1e-4,
               device='/cpu:0',
               scope=None):
    self.vocab_size = vocab_size
    self.embedding_size = embedding_size
    self.classes = classes
    self.fw_word_cell = fw_word_cell
    self.bw_word_cell = bw_word_cell
    self.word_output_size = word_output_size
    self.fw_sentence_cell = fw_sentence_cell
    self.bw_sentence_cell = bw_sentence_cell
    self.sentence_output_size = sentence_output_size
    self.max_grad_norm = max_grad_norm
    self.dropout_keep_proba = dropout_keep_proba

    with tf.variable_scope(scope or 'tcm') as scope:
      self.global_step = tf.Variable(0, name='global_step', trainable=False)

      if is_training is not None:
        self.is_training = is_training
      else:
        self.is_training = tf.placeholder(dtype=tf.bool, name='is_training')

      self.sample_weights = tf.placeholder(
          shape=(None,), dtype=tf.float32, name='sample_weights')

      # [document x sentence x word]
      self.inputs = tf.placeholder(
          shape=(None, None, None), dtype=tf.int32, name='inputs')

      # [document x sentence]
      self.word_lengths = tf.placeholder(
          shape=(None, None), dtype=tf.int32, name='word_lengths')

      # [document]
      self.sentence_lengths = tf.placeholder(
          shape=(None,), dtype=tf.int32, name='sentence_lengths')

      # [document]
      self.labels = tf.placeholder(shape=(None,), dtype=tf.int32, name='labels')

      (self.document_size, self.sentence_size, self.word_size) = tf.unstack(
          tf.shape(self.inputs))

      self._init_embedding(scope)

      # embeddings cannot be placed on GPU
      with tf.device(device):
        self._init_body(scope)

    with tf.variable_scope('train'):
      self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=self.labels, logits=self.logits)

      self.loss = tf.reduce_mean(
          tf.multiply(self.cross_entropy, self.sample_weights))
      tf.summary.scalar('loss', self.loss)

      self.accuracy = tf.reduce_mean(
          tf.cast(tf.nn.in_top_k(self.logits, self.labels, 1), tf.float32))
      tf.summary.scalar('accuracy', self.accuracy)

      tvars = tf.trainable_variables()

      grads, global_norm = tf.clip_by_global_norm(
          tf.gradients(self.loss, tvars), self.max_grad_norm)
      tf.summary.scalar('global_grad_norm', global_norm)

      opt = tf.train.AdamOptimizer(learning_rate)

      self.train_op = opt.apply_gradients(
          zip(grads, tvars), name='train_op', global_step=self.global_step)

      self.summary_op = tf.summary.merge_all()

  def _init_embedding(self, scope):
    with tf.variable_scope(scope):
      with tf.variable_scope('embedding') as scope:
        self.embedding_matrix = tf.get_variable(
            name='embedding_matrix',
            shape=[self.vocab_size, self.embedding_size],
            initializer=layers.xavier_initializer(),
            dtype=tf.float32)
        self.inputs_embedded = tf.nn.embedding_lookup(self.embedding_matrix,
                                                      self.inputs)

  def _init_body(self, scope):
    with tf.variable_scope(scope):

      word_level_inputs = tf.reshape(self.inputs_embedded, [
          self.document_size * self.sentence_size, self.word_size,
          self.embedding_size
      ])
      word_level_lengths = tf.reshape(self.word_lengths,
                                      [self.document_size * self.sentence_size])

      with tf.variable_scope('word') as scope:
        word_encoder_output, _ = bidirectional_rnn(
            self.fw_word_cell,
            self.bw_word_cell,
            word_level_inputs,
            word_level_lengths,
            scope=scope)

        with tf.variable_scope('attention') as scope:
          word_level_output = task_specific_attention(
              word_encoder_output, self.word_output_size, scope=scope)

        with tf.variable_scope('dropout'):
          word_level_output = layers.dropout(
              word_level_output,
              keep_prob=self.dropout_keep_proba,
              is_training=self.is_training,
          )

      # sentence_level

      sentence_inputs = tf.reshape(
          word_level_output,
          [self.document_size, self.sentence_size, self.word_output_size])

      with tf.variable_scope('sentence') as scope:
        sentence_encoder_output, _ = bidirectional_rnn(
            self.fw_sentence_cell,
            self.bw_sentence_cell,
            sentence_inputs,
            self.sentence_lengths,
            scope=scope)

        with tf.variable_scope('attention') as scope:
          sentence_level_output = task_specific_attention(
              sentence_encoder_output, self.sentence_output_size, scope=scope)

        with tf.variable_scope('dropout'):
          sentence_level_output = layers.dropout(
              sentence_level_output,
              keep_prob=self.dropout_keep_proba,
              is_training=self.is_training,
          )

      with tf.variable_scope('classifier'):
        self.logits = layers.fully_connected(
            sentence_level_output, self.classes, activation_fn=None)

        self.prediction = tf.argmax(self.logits, axis=-1)

  def get_feed_data(self, x, y=None, class_weights=None, is_training=True):
    x_m, doc_sizes, sent_sizes = data_util.batch(x)
    fd = {
        self.inputs: x_m,
        self.sentence_lengths: doc_sizes,
        self.word_lengths: sent_sizes,
    }
    if y is not None:
      fd[self.labels] = y
      if class_weights is not None:
        fd[self.sample_weights] = [class_weights[yy] for yy in y]
      else:
        fd[self.sample_weights] = np.ones(shape=[len(x_m)], dtype=np.float32)
    fd[self.is_training] = is_training
    return fd


if __name__ == '__main__':
  try:
    from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple, GRUCell
  except ImportError:
    LSTMCell = tf.nn.rnn_cell.LSTMCell
    LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple
    GRUCell = tf.nn.rnn_cell.GRUCell

  tf.reset_default_graph()
  with tf.Session() as session:
    model = HANClassifierModel(
        vocab_size=10,
        embedding_size=5,
        classes=2,
        fw_word_cell=GRUCell(10),
        bw_word_cell=GRUCell(10),
        fw_sentence_cell=GRUCell(10),
        bw_sentence_cell=GRUCell(10),
        word_output_size=10,
        sentence_output_size=10,
        max_grad_norm=5.0,
        dropout_keep_proba=0.5,
    )
    session.run(tf.global_variables_initializer())

    fd = {
        model.is_training: False,
        model.inputs: [[[5, 4, 1, 0], [3, 3, 6, 7], [6, 7, 0, 0]],
                       [[2, 2, 1, 0], [3, 3, 6, 7], [0, 0, 0, 0]]],
        model.word_lengths: [
            [3, 4, 2],
            [3, 4, 0],
        ],
        model.sentence_lengths: [3, 2],
        model.labels: [0, 1],
    }

    print(session.run(model.logits, fd))
    session.run(model.train_op, fd)


================================================
FILE: hierarchical_attention_research/han_model/LICENSE
================================================
MIT License

Copyright (c) 2017 Matvey Ezhov

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: hierarchical_attention_research/han_model/README.md
================================================
# Deep Text Classifier

Implementation of document classification model described in [Hierarchical Attention Networks for Document Classification (Yang et al., 2016)](https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf).

## How to run

1. Create a virtual environment, activate it, and install requirements:
```
python3 -m venv env
source env/bin/activate
pip install -r requirements.txt
```

2. Download the English model for spaCy:

```
python -m spacy download en
```

3. Get [Yelp review dataset](https://www.yelp.com/dataset_challenge) and extract it in this directory.
```
python3 yelp_prepare.py dataset/review.json
python3 worker.py --mode=train --device=/gpu:0 --batch-size=30
```

## Results
I am getting 65% accuracy on a dev set (16% of data) after 3 epochs. Results reported in the paper are 71% on Yelp'15.
No systemic hyperparameter optimization was performed.

================================================
FILE: hierarchical_attention_research/han_model/bn_lstm.py
================================================
# borrowed from https://github.com/OlavHN/bnlstm, updated for r1.0

import math
import numpy as np
import tensorflow as tf

try:
  from tensorflow.contrib.rnn import RNNCell
except ImportError:
  RNNCell = tf.nn.rnn_cell.RNNCel


class LSTMCell(RNNCell):
  """Vanilla LSTM implemented with same initializations as BN-LSTM"""

  def __init__(self, num_units):
    self.num_units = num_units

  @property
  def state_size(self):
    return (self.num_units, self.num_units)

  @property
  def output_size(self):
    return self.num_units

  def __call__(self, x, state, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      c, h = state

      # Keep W_xh and W_hh separate here as well to reuse initialization methods
      x_size = x.get_shape().as_list()[1]
      W_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units],
          initializer=orthogonal_initializer())
      W_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units],
          initializer=bn_lstm_identity_initializer(0.95))
      bias = tf.get_variable('bias', [4 * self.num_units])

      # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias
      # improve speed by concat.
      concat = tf.concat([x, h], 1)
      W_both = tf.concat([W_xh, W_hh], 0)
      hidden = tf.matmul(concat, W_both) + bias

      i, j, f, o = tf.split(hidden, 4, axis=1)

      new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
      new_h = tf.tanh(new_c) * tf.sigmoid(o)

      return new_h, (new_c, new_h)


class BNLSTMCell(RNNCell):
  """Batch normalized LSTM as described in http://arxiv.org/abs/1603.09025"""

  def __init__(self, num_units, training):
    self.num_units = num_units
    self.training = training

  @property
  def state_size(self):
    return (self.num_units, self.num_units)

  @property
  def output_size(self):
    return self.num_units

  def __call__(self, x, state, scope=None):
    with tf.variable_scope(scope or 'bn_lstm'):
      c, h = state

      x_size = x.get_shape().as_list()[1]
      W_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units],
          initializer=orthogonal_initializer())
      W_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units],
          initializer=bn_lstm_identity_initializer(0.95))
      bias = tf.get_variable('bias', [4 * self.num_units])

      xh = tf.matmul(x, W_xh)
      hh = tf.matmul(h, W_hh)

      bn_xh = batch_norm(xh, 'xh', self.training)
      bn_hh = batch_norm(hh, 'hh', self.training)

      hidden = bn_xh + bn_hh + bias

      i, j, f, o = tf.split(hidden, 4, axis=1)

      new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
      bn_new_c = batch_norm(new_c, 'c', self.training)

      new_h = tf.tanh(bn_new_c) * tf.sigmoid(o)

      return new_h, (new_c, new_h)


def orthogonal(shape):
  flat_shape = (shape[0], np.prod(shape[1:]))
  a = np.random.normal(0.0, 1.0, flat_shape)
  u, _, v = np.linalg.svd(a, full_matrices=False)
  q = u if u.shape == flat_shape else v
  return q.reshape(shape)


def bn_lstm_identity_initializer(scale):

  def _initializer(shape, dtype=tf.float32, partition_info=None):
    """Ugly cause LSTM params calculated in one matrix multiply"""
    size = shape[0]
    # gate (j) is identity
    t = np.zeros(shape)
    t[:, size:size * 2] = np.identity(size) * scale
    t[:, :size] = orthogonal([size, size])
    t[:, size * 2:size * 3] = orthogonal([size, size])
    t[:, size * 3:] = orthogonal([size, size])
    return tf.constant(t, dtype=dtype)

  return _initializer


def orthogonal_initializer():

  def _initializer(shape, dtype=tf.float32, partition_info=None):
    return tf.constant(orthogonal(shape), dtype)

  return _initializer


def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999):
  """Assume 2d [batch, values] tensor"""

  with tf.variable_scope(name_scope):
    size = x.get_shape().as_list()[1]

    scale = tf.get_variable(
        'scale', [size], initializer=tf.constant_initializer(0.1))
    offset = tf.get_variable('offset', [size])

    pop_mean = tf.get_variable(
        'pop_mean', [size], initializer=tf.zeros_initializer(), trainable=False)
    pop_var = tf.get_variable(
        'pop_var', [size], initializer=tf.ones_initializer(), trainable=False)
    batch_mean, batch_var = tf.nn.moments(x, [0])

    train_mean_op = tf.assign(pop_mean,
                              pop_mean * decay + batch_mean * (1 - decay))
    train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))

    def batch_statistics():
      with tf.control_dependencies([train_mean_op, train_var_op]):
        return tf.nn.batch_normalization(x, batch_mean, batch_var, offset,
                                         scale, epsilon)

    def population_statistics():
      return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale,
                                       epsilon)

    return tf.cond(training, batch_statistics, population_statistics)


================================================
FILE: hierarchical_attention_research/han_model/bn_lstm_test.py
================================================
import time
import uuid
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.ops.rnn import dynamic_rnn
from bn_lstm import LSTMCell, BNLSTMCell, orthogonal_initializer
from tensorflow.examples.tutorials.mnist import input_data

batch_size = 100
hidden_size = 100

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

x = tf.placeholder(tf.float32, [None, 784])
training = tf.placeholder(tf.bool)

x_inp = tf.expand_dims(x, -1)
lstm = BNLSTMCell(hidden_size, training) #LSTMCell(hidden_size)

#c, h
initialState = (
    tf.random_normal([batch_size, hidden_size], stddev=0.1),
    tf.random_normal([batch_size, hidden_size], stddev=0.1))

outputs, state = dynamic_rnn(lstm, x_inp, initial_state=initialState, dtype=tf.float32)

_, final_hidden = state

W = tf.get_variable('W', [hidden_size, 10], initializer=orthogonal_initializer())
b = tf.get_variable('b', [10])

y = tf.nn.softmax(tf.matmul(final_hidden, W) + b)

y_ = tf.placeholder(tf.float32, [None, 10])

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

optimizer = tf.train.AdamOptimizer()
gvs = optimizer.compute_gradients(cross_entropy)
capped_gvs = [(None if grad is None else tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
train_step = optimizer.apply_gradients(capped_gvs)

correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Summaries
tf.summary.scalar("accuracy", accuracy)
tf.summary.scalar("xe_loss", cross_entropy)
for (grad, var), (capped_grad, _) in zip(gvs, capped_gvs):
    if grad is not None:
        tf.summary.histogram('grad/{}'.format(var.name), capped_grad)
        tf.summary.histogram('capped_fraction/{}'.format(var.name),
            tf.nn.zero_fraction(grad - capped_grad))
        tf.summary.histogram('weight/{}'.format(var.name), var)

merged = tf.merge_all_summaries()

init = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init)

logdir = 'logs/' + str(uuid.uuid4())
os.makedirs(logdir)
print('logging to ' + logdir)
writer = tf.summary.trainWriter(logdir, sess.graph)

current_time = time.time()
print("Using population statistics (training: False) at test time gives worse results than batch statistics")

for i in range(100000):
    batch_xs, batch_ys = mnist.train.next_batch(batch_size)
    loss, _ = sess.run([cross_entropy, train_step], feed_dict={x: batch_xs, y_: batch_ys, training: True})
    step_time = time.time() - current_time
    current_time = time.time()
    if i % 100 == 0:
        batch_xs, batch_ys = mnist.validation.next_batch(batch_size)
summary.    _str = sess.run(merged, feed_dict={x: batch_xs, y_: batch_ys, training: False})
        writer.summary.add_str, i)
    print(loss, step_time)


================================================
FILE: hierarchical_attention_research/han_model/data_util.py
================================================
import numpy as np


def batch(inputs):
  batch_size = len(inputs)

  document_sizes = np.array([len(doc) for doc in inputs], dtype=np.int32)
  document_size = document_sizes.max()

  sentence_sizes_ = [[len(sent) for sent in doc] for doc in inputs]
  sentence_size = max(map(max, sentence_sizes_))

  b = np.zeros(
      shape=[batch_size, document_size, sentence_size],
      dtype=np.int32)  # == PAD

  sentence_sizes = np.zeros(shape=[batch_size, document_size], dtype=np.int32)
  for i, document in enumerate(inputs):
    for j, sentence in enumerate(document):
      sentence_sizes[i, j] = sentence_sizes_[i][j]
      for k, word in enumerate(sentence):
        b[i, j, k] = word

  return b, document_sizes, sentence_sizes


================================================
FILE: hierarchical_attention_research/han_model/model_components.py
================================================
import tensorflow as tf
import tensorflow.contrib.layers as layers

try:
  from tensorflow.contrib.rnn import LSTMStateTuple
except ImportError:
  LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple


def bidirectional_rnn(cell_fw,
                      cell_bw,
                      inputs_embedded,
                      input_lengths,
                      scope=None):
  """Bidirecional RNN with concatenated outputs and states"""
  with tf.variable_scope(scope or 'birnn') as scope:
    ((fw_outputs, bw_outputs), (fw_state, bw_state)) = (
        tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=inputs_embedded,
            sequence_length=input_lengths,
            dtype=tf.float32,
            swap_memory=True,
            scope=scope))
    outputs = tf.concat((fw_outputs, bw_outputs), 2)

    def concatenate_state(fw_state, bw_state):
      if isinstance(fw_state, LSTMStateTuple):
        state_c = tf.concat((fw_state.c, bw_state.c),
                            1,
                            name='bidirectional_concat_c')
        state_h = tf.concat((fw_state.h, bw_state.h),
                            1,
                            name='bidirectional_concat_h')
        state = LSTMStateTuple(c=state_c, h=state_h)
        return state
      elif isinstance(fw_state, tf.Tensor):
        state = tf.concat((fw_state, bw_state), 1, name='bidirectional_concat')
        return state
      elif (isinstance(fw_state, tuple) and isinstance(bw_state, tuple) and
            len(fw_state) == len(bw_state)):
        # multilayer
        state = tuple(
            concatenate_state(fw, bw) for fw, bw in zip(fw_state, bw_state))
        return state

      else:
        raise ValueError('unknown state type: {}'.format((fw_state, bw_state)))

    state = concatenate_state(fw_state, bw_state)
    return outputs, state


def task_specific_attention(inputs,
                            output_size,
                            initializer=layers.xavier_initializer(),
                            activation_fn=tf.tanh,
                            scope=None):
  """
    Performs task-specific attention reduction, using learned
    attention context vector (constant within task of interest).

    Args:
        inputs: Tensor of shape [batch_size, units, input_size] `input_size`
          must be static (known) `units` axis will be attended over (reduced
          from output) `batch_size` will be preserved
        output_size: Size of output's inner (feature) dimension

    Returns:
        outputs: Tensor of shape [batch_size, output_dim].
    """
  assert len(
      inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None

  with tf.variable_scope(scope or 'attention') as scope:
    attention_context_vector = tf.get_variable(
        name='attention_context_vector',
        shape=[output_size],
        initializer=initializer,
        dtype=tf.float32)
    input_projection = layers.fully_connected(
        inputs, output_size, activation_fn=activation_fn, scope=scope)

    vector_attn = tf.reduce_sum(
        tf.multiply(input_projection, attention_context_vector),
        axis=2,
        keep_dims=True)
    attention_weights = tf.nn.softmax(vector_attn, dim=1)
    weighted_projection = tf.multiply(input_projection, attention_weights)

    outputs = tf.reduce_sum(weighted_projection, axis=1)

    return outputs


================================================
FILE: hierarchical_attention_research/han_model/requirements.txt
================================================
cymem==1.31.2
cytoolz==0.8.2
dill==0.2.7.1
en-core-web-sm==2.0.0
msgpack-numpy==0.4.1
msgpack-python==0.5.6
murmurhash==0.28.0
numpy==1.22.0
pathlib==1.0.1
plac==0.9.6
preshed==1.0.0
regex==2017.4.5
six==1.11.0
spacy==2.0.11
termcolor==1.1.0
thinc==6.10.2
toolz==0.9.0
tqdm==4.22.0
ujson==5.4.0
wrapt==1.10.11


================================================
FILE: hierarchical_attention_research/han_model/worker.py
================================================
#!/usr/bin/env python3
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--task', default='yelp', choices=['yelp'])
parser.add_argument('--mode', default='train', choices=['train', 'eval'])
parser.add_argument('--checkpoint-frequency', type=int, default=100)
parser.add_argument('--eval-frequency', type=int, default=10000)
parser.add_argument('--batch-size', type=int, default=30)
parser.add_argument('--device', default='/cpu:0')
parser.add_argument('--max-grad-norm', type=float, default=5.0)
parser.add_argument('--lr', type=float, default=0.001)
args = parser.parse_args()

import importlib
import os
import pickle
import random
import time
from collections import Counter, defaultdict

import numpy as np
import pandas as pd
import spacy
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
from tqdm import tqdm

import ujson
from data_util import batch

task_name = args.task

task = importlib.import_module(task_name)

checkpoint_dir = os.path.join(task.train_dir, 'checkpoint')
tflog_dir = os.path.join(task.train_dir, 'tflog')
checkpoint_name = task_name + '-model'
checkpoint_dir = os.path.join(task.train_dir, 'checkpoints')
checkpoint_path = os.path.join(checkpoint_dir, checkpoint_name)

# @TODO: move calculation into `task file`
trainset = task.read_trainset(epochs=1)
class_weights = pd.Series(Counter([l for _, l in trainset]))
class_weights = 1 / (class_weights / class_weights.mean())
class_weights = class_weights.to_dict()

vocab = task.read_vocab()
labels = task.read_labels()

classes = max(labels.values()) + 1
vocab_size = task.vocab_size

labels_rev = {int(v): k for k, v in labels.items()}
vocab_rev = {int(v): k for k, v in vocab.items()}

NUM_RNN_LAYERS = 5


def HAN_model_1(session, restore_only=False):
  """Hierarhical Attention Network"""
  import tensorflow as tf
  try:
    from tensorflow.contrib.rnn import GRUCell, MultiRNNCell, DropoutWrapper
  except ImportError:
    MultiRNNCell = tf.nn.rnn_cell.MultiRNNCell
    GRUCell = tf.nn.rnn_cell.GRUCell
  from bn_lstm import BNLSTMCell
  from HAN_model import HANClassifierModel

  is_training = tf.placeholder(dtype=tf.bool, name='is_training')

  def bn_cell():
    return BNLSTMCell(80, is_training)  # h-h batchnorm LSTMCell

  # cell = GRUCell(30)
  fw_word_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])
  bw_word_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])
  fw_sentence_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])
  bw_sentence_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])

  model = HANClassifierModel(
      vocab_size=vocab_size,
      embedding_size=200,
      classes=classes,
      fw_word_cell=fw_word_cell,
      bw_word_cell=bw_word_cell,
      fw_sentence_cell=fw_sentence_cell,
      bw_sentence_cell=bw_sentence_cell,
      word_output_size=100,
      sentence_output_size=100,
      device=args.device,
      learning_rate=args.lr,
      max_grad_norm=args.max_grad_norm,
      dropout_keep_proba=0.5,
      is_training=is_training,
  )

  saver = tf.train.Saver(tf.global_variables())
  checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)
  if checkpoint:
    print('Reading model parameters from %s' % checkpoint.model_checkpoint_path)
    saver.restore(session, checkpoint.model_checkpoint_path)
  elif restore_only:
    raise FileNotFoundError('Cannot restore model')
  else:
    print('Created model with fresh parameters')
    session.run(tf.global_variables_initializer())
  # tf.get_default_graph().finalize()
  return model, saver


model_fn = HAN_model_1


def decode(ex):
  print('text: ' + '\n'.join(
      [' '.join([vocab_rev.get(wid, '<?>')
                 for wid in sent])
       for sent in ex[0]]))
  print('label: ', labels_rev[ex[1]])


print('data loaded')


def batch_iterator(dataset, batch_size, max_epochs):
  for i in range(max_epochs):
    xb = []
    yb = []
    for ex in dataset:
      x, y = ex
      xb.append(x)
      yb.append(y)
      if len(xb) == batch_size:
        yield xb, yb
        xb, yb = [], []


def ev(session, model, dataset):
  predictions = []
  labels = []
  examples = []
  for x, y in tqdm(batch_iterator(dataset, args.batch_size, 1)):
    examples.extend(x)
    labels.extend(y)
    predictions.extend(
        session.run(model.prediction, model.get_feed_data(x,
                                                          is_training=False)))

  df = pd.DataFrame({
      'predictions': predictions,
      'labels': labels,
      'examples': examples
  })
  return df


def evaluate(dataset):
  tf.reset_default_graph()
  config = tf.ConfigProto(allow_soft_placement=True)
  with tf.Session(config=config) as s:
    model, _ = model_fn(s, restore_only=True)
    df = ev(s, model, dataset)
  print((df['predictions'] == df['labels']).mean())
  import IPython
  IPython.embed()


def train():
  tf.reset_default_graph()

  config = tf.ConfigProto(allow_soft_placement=True)

  with tf.Session(config=config) as s:
    model, saver = model_fn(s)
    summary_writer = tf.summary.FileWriter(
        tflog_dir, graph=tf.get_default_graph())

    # Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto
    # pconf = projector.ProjectorConfig()

    # # You can add multiple embeddings. Here we add only one.
    # embedding = pconf.embeddings.add()
    # embedding.tensor_name = m.embedding_matrix.name

    # # Link this tensor to its metadata file (e.g. labels).
    # embedding.metadata_path = vocab_tsv

    # print(embedding.tensor_name)

    # Saves a configuration file that TensorBoard will read during startup.

    for i, (x, y) in enumerate(
        batch_iterator(task.read_trainset(epochs=3), args.batch_size, 300)):
      fd = model.get_feed_data(x, y, class_weights=class_weights)

      # import IPython
      # IPython.embed()

      t0 = time.clock()
      step, summaries, loss, accuracy, _ = s.run([
          model.global_step,
          model.summary_op,
          model.loss,
          model.accuracy,
          model.train_op,
      ], fd)
      td = time.clock() - t0

      summary_writer.add_summary(summaries, global_step=step)
      # projector.visualize_embeddings(summary_writer, pconf)

      if step % 1 == 0:
        print('step %s, loss=%s, accuracy=%s, t=%s, inputs=%s' %
              (step, loss, accuracy, round(td, 2), fd[model.inputs].shape))
      if step != 0 and step % args.checkpoint_frequency == 0:
        print('checkpoint & graph meta')
        saver.save(s, checkpoint_path, global_step=step)
        print('checkpoint done')
      if step != 0 and step % args.eval_frequency == 0:
        print('evaluation at step %s' % i)
        dev_df = ev(s, model, task.read_devset(epochs=1))
        print('dev accuracy: %.2f' %
              (dev_df['predictions'] == dev_df['labels']).mean())


def main():
  if args.mode == 'train':
    train()
  elif args.mode == 'eval':
    evaluate(task.read_devset(epochs=1))


if __name__ == '__main__':
  main()


================================================
FILE: hierarchical_attention_research/han_model/yelp.py
================================================
import os
import pickle

train_dir = os.path.join(os.path.curdir, 'yelp')
data_dir = os.path.join(train_dir, 'data')

for dir in [train_dir, data_dir]:
  if not os.path.exists(dir):
    os.makedirs(dir)

trainset_fn = os.path.join(data_dir, 'train.dataset')
devset_fn = os.path.join(data_dir, 'dev.dataset')
testset_fn = os.path.join(data_dir, 'test.dataset')
vocab_fn = os.path.join(data_dir, 'vocab.pickle')

reserved_tokens = 5
unknown_id = 2

vocab_size = 50001


def _read_dataset(fn, review_max_sentences=30, sentence_max_length=30,
                  epochs=1):
  c = 0
  while 1:
    c += 1
    if epochs > 0 and c > epochs:
      return
    print('epoch %s' % c)
    with open(fn, 'rb') as f:
      try:
        while 1:
          x, y = pickle.load(f)

          # clip review to specified max lengths
          x = x[:review_max_sentences]
          x = [sent[:sentence_max_length] for sent in x]

          y -= 1
          assert y >= 0 and y <= 4
          yield x, y
      except EOFError:
        continue


def read_trainset(epochs=1):
  return _read_dataset(trainset_fn, epochs=epochs)


def read_devset(epochs=1):
  return _read_dataset(devset_fn, epochs=epochs)


def read_vocab():
  with open(vocab_fn, 'rb') as f:
    return pickle.load(f)


def read_labels():
  return {i: i for i in range(5)}


================================================
FILE: hierarchical_attention_research/han_model/yelp_prepare.py
================================================
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('review_path')
args = parser.parse_args()

import os
import ujson as json
import spacy
import pickle
import random
from tqdm import tqdm
from collections import defaultdict
import numpy as np
from yelp import *

en = spacy.load('en')


def read_reviews():
  with open(args.review_path, 'rb') as f:
    for line in f:
      yield json.loads(line)


def build_word_frequency_distribution():
  path = os.path.join(data_dir, 'word_freq.pickle')

  try:
    with open(path, 'rb') as freq_dist_f:
      freq_dist_f = pickle.load(freq_dist_f)
      print('frequency distribution loaded')
      return freq_dist_f
  except IOError:
    pass

  print('building frequency distribution')
  freq = defaultdict(int)
  for i, review in enumerate(read_reviews()):
    doc = en.tokenizer(review['text'])
    for token in doc:
      freq[token.orth_] += 1
    if i % 10000 == 0:
      with open(path, 'wb') as freq_dist_f:
        pickle.dump(freq, freq_dist_f)
      print('dump at {}'.format(i))
  return freq


def build_vocabulary(lower=3, n=50000):
  try:
    with open(vocab_fn, 'rb') as vocab_file:
      vocab = pickle.load(vocab_file)
      print('vocabulary loaded')
      return vocab
  except IOError:
    print('building vocabulary')
  freq = build_word_frequency_distribution()
  top_words = list(sorted(freq.items(), key=lambda x: -x[1]))[:n - lower + 1]
  vocab = {}
  i = lower
  for w, freq in top_words:
    vocab[w] = i
    i += 1
  with open(vocab_fn, 'wb') as vocab_file:
    pickle.dump(vocab, vocab_file)
  return vocab


UNKNOWN = 2


def make_data(split_points=(0.8, 0.94)):
  train_ratio, dev_ratio = split_points
  vocab = build_vocabulary()
  train_f = open(trainset_fn, 'wb')
  dev_f = open(devset_fn, 'wb')
  test_f = open(testset_fn, 'wb')

  try:
    for review in tqdm(read_reviews()):
      x = []
      for sent in en(review['text']).sents:
        x.append([vocab.get(tok.orth_, UNKNOWN) for tok in sent])
      y = review['stars']

      r = random.random()
      if r < train_ratio:
        f = train_f
      elif r < dev_ratio:
        f = dev_f
      else:
        f = test_f
      pickle.dump((x, y), f)
  except KeyboardInterrupt:
    pass

  train_f.close()
  dev_f.close()
  test_f.close()


if __name__ == '__main__':
  make_data()


================================================
FILE: kaggle-classification/.gitignore
================================================
# Directories to save model checkpoints
runs/
model/*
saved_models/*

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Python virtual environment directory
env/

# Don't version control data: this is the directory where data is downloaded to.
local_data/

# Temporary directory for hacking stuff in
tmp/

# Comet API key
comet_api_key.txt


================================================
FILE: kaggle-classification/README.md
================================================
# Toxic Comment Classification Kaggle Challenge

This directory is a place to play around with solutions for the
[Toxic Comment Classification Kaggle challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge).
The challenge was created by the Jigsaw Conversation AI team in December 2017
and the it ends in February 2018.

These models are meant to be simple baselines created independently from the
Google infrastructure.


## To Run Locally

1.  Setup a (virtualenv)[https://virtualenvwrapper.readthedocs.io/en/latest/] for
    the project (recommended, but technically optional).

    Python 2:

    ```
    python -m virtualenv env
    ```

    Python 3:

    ```
    python3 -m venv env
    ```

    From either to enter your virtual env:

    ```shell
    source env/bin/activate
    ```

2.  Install library dependencies:

    ```shell
    pip install -r requirements.txt
    ```

3.  For training locally, download the training (`train.csv`) and test
    (`test.csv`) data from the
    [Kaggle challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/data).

    If you have [a Kaggle API Key](https://github.com/Kaggle/kaggle-api#api-credentials)
    setup, you can use the [Kaggle api tool](https://github.com/Kaggle/kaggle-api)
    to download these files by running:

    ```shell
    kaggle competitions download -c jigsaw-toxic-comment-classification-challenge -p ./
    mv jigsaw-toxic-comment-classification-challenge local_data
    for z in local_data/*.zip; do unzip -x $z -d local_data/; done
    ```

    Note: the `kaggle` command is installed from the `pip` and specified in
    `requirements.txt`.

4.  Run a model on a given class (e.g. 'toxic' or 'obscene'). There are examples
    of how to run the model locally and using ml-engine in `bin/run_local` and
    `bin/run` respectively.

    Note: to run in google cloud, you will need to be authenticated with
    Google Cloud (you can run `gcloud auth application-default login` to do
    this) and you must have access to the cloud bucket where the data is located
    (you can test this by running `gcloud storage ls  gs://kaggle-model-experiments/`).


## Available Models
  * `bag_of_words` - bag of words model with a learned word-embedding layer
  * `cnn` - a 2 layer ConvNet


## Data

Copies of the training and test data are available in Google Storage from the
wikidetox project.

* train.csv: gs://kaggle-model-experiments/train.csv
* test.csv: gs://kaggle-model-experiments/test.csv


================================================
FILE: kaggle-classification/__init__.py
================================================


================================================
FILE: kaggle-classification/bin/cancel-job
================================================
#!/bin/bash

gcloud ml-engine jobs cancel $1


================================================
FILE: kaggle-classification/bin/ls-jobs
================================================
#!/bin/bash

DATE=`date '+%Y-%m-%d'`

gcloud ml-engine jobs list | grep $DATE


================================================
FILE: kaggle-classification/bin/run
================================================
#!/bin/bash

#
# A script to train the kaggle model remotely using ml-engine.
#
# To run with default hyperparameters from the kaggle-classification directory just enter:
# './bin/run'
#
# To run with hyperparameter tuning, enter:
# './bin/run -c hparam_config.yaml'
# 
#
# Setup Steps:
# 1. Install the gcloud SDK
# 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]`
# 3. Put the train and test data in Cloud Storage, `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/`
#

# Edit these!
BUCKET_NAME=kaggle-model-experiments
CONFIG=gpu_config.yaml
JOB_NAME=${USER}_kaggle_training
# Note: this must be compatible with cells that have GPUs. us-central1 works.
# See: https://cloud.google.com/ml-engine/docs/using-gpus
REGION=us-central1
DATE=`date '+%Y%m%d_%H%M%S'`
OUTPUT_PATH=gs://${BUCKET_NAME}/models/${USER}/${DATE}

while getopts :c:h opt; do
case ${opt} in
h) 
    echo "Usage: run [-c config_filename.yaml]"
    echo "Flags: "
    echo -e " -c Specify a config file (e.g. use hparam_config to enable hyperparameter tuning)"
    exit 0;;
c) 
    echo "Using custom config ${OPTARG}"
    CONFIG=${OPTARG};;
:)
    echo "Error: ${OPTARG} requires an argument."
    echo "Use 'run -h' for help."
    exit 1;;
\?) 
    echo "Invalid flag. Use 'run -h' for help."
    exit 1;;
esac
done


echo "Writing to $OUTPUT_PATH"

# Remote
gcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \
    --job-dir ${OUTPUT_PATH} \
    --runtime-version 1.4 \
    --config ${CONFIG} \
    --module-name trainer.model \
    --package-path trainer/ \
    --region $REGION \
    --verbosity debug \
    -- \
    --train_data gs://${BUCKET_NAME}/train.csv \
    --y_class toxic \
    --train_steps 5000 \
    --saved_model_dir gs://${BUCKET_NAME}/saved_graph/${USER}/${DATE} \
    --model cnn


echo "You can view the tensorboard for this job with the command:"
echo ""
echo -e "\t tensorboard --logdir=${OUTPUT_PATH}"
echo ""
echo "And on your browser navigate to:"
echo ""
echo -e "\t http://localhost:6006/#scalars"
echo ""
echo "This will populate after a model checkpoint is saved."
echo ""


================================================
FILE: kaggle-classification/bin/run_keras.sh
================================================
#!/bin/bash

#
# A script to train the kaggle model remotely using ml-engine.
#
# Setup Steps:
# 1. Install the gcloud SDK
# 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]`
# 3. Put the train and test data in Cloud Storage,
#      `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/resources`
#

# Edit these!
BUCKET_NAME=kaggle-model-experiments
JOB_NAME=${USER}_kaggle_training
REGION=us-east1

INPUT_PATH=gs://${BUCKET_NAME}/resources
DATE=`date '+%Y%m%d_%H%M%S'`
OUTPUT_PATH=gs://${BUCKET_NAME}/keras_runs/${USER}/${DATE}
LOG_PATH=${OUTPUT_PATH}/logs/
HPARAM_CONFIG=keras_hparam_config.yaml
COMET_KEY_FILE='comet_api_key.txt'
COMET_KEY=$(cat ${COMET_KEY_FILE})
COMET_PROJECT_NAME='compare-models'

echo "Writing to $OUTPUT_PATH"

# Remote
gcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \
    --job-dir=$OUTPUT_PATH \
    --runtime-version=1.8 \
    --module-name=keras_trainer.model \
    --package-path=keras_trainer \
    --region=$REGION \
    --verbosity=debug \
    --config=${HPARAM_CONFIG} \
    -- \
    --train_path=${INPUT_PATH}/train.csv \
    --test_path=${INPUT_PATH}/validation.csv \
    --embeddings_path=${INPUT_PATH}/glove.6B/glove.6B.300d.txt \
    --log_path=${LOG_PATH} \
    --comet_key=${COMET_KEY} \
    --comet_project_name=${COMET_PROJECT_NAME} \
    --model_type=single_layer_cnn

echo "You can view the tensorboard for this job with the command:"
echo ""
echo -e "\t tensorboard --logdir=${LOG_PATH}"
echo ""
echo "And on your browser navigate to:"
echo ""
echo -e "\t http://localhost:6006/#scalars"
echo ""
echo "This will populate after a model checkpoint is saved."
echo ""


================================================
FILE: kaggle-classification/bin/run_keras_local.sh
================================================
#!/bin/bash

DATE=`date '+%Y%m%d_%H%M%S'`
OUTPUT_PATH=runs/${DATE}
INPUT_PATH=local_data
LOG_PATH=${OUTPUT_PATH}/logs/
COMET_KEY_FILE='comet_api_key.txt'
COMET_KEY=$(cat ${COMET_KEY_FILE})
COMET_PROJECT_NAME='compare-models'

echo "You can view the tensorboard for this job with the command:"
echo ""
echo -e "\t tensorboard --logdir=${LOG_PATH}"
echo ""
echo "And on your browser navigate to:"
echo ""
echo -e "\t http://localhost:6006/#scalars"
echo ""
echo "This will populate after a model checkpoint is saved."
echo ""

python -m keras_trainer.model \
       --train_path=${INPUT_PATH}/train.csv \
       --test_path=${INPUT_PATH}/validation.csv \
       --embeddings_path=${INPUT_PATH}/glove.6B/glove.6B.100d.txt \
       --job-dir=${OUTPUT_PATH} \
       --log_path=${LOG_PATH} \
       --comet_key=${COMET_KEY} \
       --comet_project_name=${COMET_PROJECT_NAME} \
       --model_type=rnn


================================================
FILE: kaggle-classification/bin/run_local
================================================
#!/bin/bash

#
# A script to train the kaggle model locally.
# Assumes that train.csv and test.csv are downloaded into the local_data/
# directory.
#
DATE=`date '+%Y%m%d_%H%M%S'`

gcloud ml-engine local train \
     --module-name=trainer.model \
     --package-path=trainer \
     --job-dir=model/${DATE} -- \
     --train_data=local_data/train.csv \
     --y_class=toxic \
     --train_steps=100


================================================
FILE: kaggle-classification/bin/stream-logs
================================================
#!/bin/bash

gcloud ml-engine jobs stream-logs $1


================================================
FILE: kaggle-classification/config.yaml
================================================
trainingInput:
  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.
  scaleTier: BASIC_GPU
  ## Custom scaleTier needed for using > 1 GPU machines.
  # scaleTier: CUSTOM
  # masterType: complex_model_m_gpu
  # workerType: complex_model_m_gpu
  # parameterServerType: large_model
  # workerCount: 9
  # parameterServerCount: 3


================================================
FILE: kaggle-classification/gpu_config.yaml
================================================
trainingInput:
  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.
  scaleTier: BASIC_GPU
  ## Custom scaleTier needed for using > 1 GPU machines.
  # scaleTier: CUSTOM
  # masterType: complex_model_m_gpu
  # workerType: complex_model_m_gpu
  # parameterServerType: large_model
  # workerCount: 9
  # parameterServerCount: 3


================================================
FILE: kaggle-classification/hparam_config.yaml
================================================
trainingInput:
  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.
  scaleTier: BASIC_GPU
  ## Custom scaleTier needed for using > 1 GPU machines.
  # scaleTier: CUSTOM
  # masterType: complex_model_m_gpu
  # workerType: complex_model_m_gpu
  # parameterServerType: large_model
  # workerCount: 9
  # parameterServerCount: 3
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: auc
    maxTrials: 100
    maxParallelTrials: 10
    enableTrialEarlyStopping: TRUE
    params:
      - parameterName: embedding_size
        type: INTEGER
        minValue: 50
        maxValue: 200
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: num_filters
        type: INTEGER
        minValue: 10
        maxValue: 200
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: dropout_keep_prob
        type: DOUBLE
        minValue: 0.5
        maxValue: 1
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: learning_rate
        type: DOUBLE
        minValue: 0.000001
        maxValue: 0.1
        scaleType: UNIT_LOG_SCALE


================================================
FILE: kaggle-classification/keras_hparam_config.yaml
================================================
trainingInput:
  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.
  pythonVersion: '3.5'
  scaleTier: BASIC_GPU
  ## Custom scaleTier needed for using > 1 GPU machines.
  # scaleTier: CUSTOM
  # masterType: complex_model_m_gpu
  # workerType: complex_model_m_gpu
  # parameterServerType: large_model
  # workerCount: 9
  # parameterServerCount: 3
  hyperparameters:
    goal: MAXIMIZE
    hyperparameterMetricTag: val_auc_roc
    maxTrials: 20
    maxParallelTrials: 3
    enableTrialEarlyStopping: TRUE
    params:
      - parameterName: learning_rate 
        type: DOUBLE
        minValue: 0.00005
        maxValue: 0.1
        scaleType: UNIT_LOG_SCALE
      - parameterName: dropout_rate
        type: DOUBLE
        minValue: 0
        maxValue: 1
        scaleType: UNIT_LINEAR_SCALE
      - parameterName: batch_size
        type: DISCRETE
        discreteValues:
        - 16
        - 32
        - 64
        - 128
        - 256


================================================
FILE: kaggle-classification/keras_trainer/__init__.py
================================================


================================================
FILE: kaggle-classification/keras_trainer/base_model.py
================================================
"""Base model class used by the ModelRunner"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from abc import ABCMeta, abstractmethod
from keras.layers import Input
from keras.models import Model


class BaseModel(metaclass=ABCMeta):
  """Base class for model runner"""

  @abstractmethod
  def get_model(self) -> Model:
    raise NotImplementedError('Method get_model needs to be implemented.')


================================================
FILE: kaggle-classification/keras_trainer/cnn_with_attention.py
================================================
"""Model class for a single layer CNN"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from keras.layers import Conv1D
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Embedding
from keras.layers import Flatten
from keras.layers import Input
from keras.layers import AveragePooling1D
from keras.layers import Activation
from keras.layers import Concatenate
from keras.layers import Multiply
from keras.models import Model
from keras.layers import Permute
from keras_trainer import base_model
from keras.layers import Activation
from keras_trainer.custom_metrics import auc_roc


class CNNWithAttention(base_model.BaseModel):
  """Single Layer Based CNN

  hparams:
    embedding_dim
    vocab_size
    sequence_length
    dropout_rate
    train_embedding
  """

  def __init__(self, embeddings_matrix, hparams, labels):
    self.embeddings_matrix = embeddings_matrix
    self.hparams = hparams
    self.labels = labels
    self.num_labels = len(labels)

  def get_model(self):
    I = Input(shape=(self.hparams.sequence_length,), dtype='float32')
    E = Embedding(
        self.hparams.vocab_size,
        self.hparams.embedding_dim,
        weights=[self.embeddings_matrix],
        input_length=self.hparams.sequence_length,
        trainable=self.hparams.train_embedding)(
            I)
    C = []
    A = []
    P = []
    for i, size in enumerate(self.hparams.filter_sizes):
      C.append(
          Conv1D(
              self.hparams.num_filters[i],
              size,
              activation='relu',
              padding='same')(E))
      A.append(
          Dense(self.hparams.attention_intermediate_size,
                activation='relu')(C[i]))
      A[i] = Dense(1, use_bias=False)(A[i])
      # Permute trick to apply softmax to second to last layer.
      A[i] = Permute((2, 1))(A[i])
      A[i] = Activation('softmax')(A[i])
      A[i] = Permute((2, 1))(A[i])
      P.append(Multiply()([A[i], C[i]]))
      P[i] = AveragePooling1D(
          self.hparams.sequence_length, padding='same')(
              P[i])
    X = Concatenate(axis=-1)(P)
    X = Flatten()(X)
    X = Dropout(self.hparams.dropout_rate)(X)
    X = Dense(128, activation='relu')(X)
    X = Dropout(self.hparams.dropout_rate)(X)
    Output = Dense(self.num_labels, activation='sigmoid', name='outputs')(X)

    model = Model(inputs=I, outputs=Output)
    model.compile(
        optimizer='rmsprop',
        loss='binary_crossentropy',
        metrics=['accuracy', auc_roc])
    print(model.summary())
    return model


================================================
FILE: kaggle-classification/keras_trainer/custom_metrics.py
================================================
"""Custom metrics used by Keras models."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf


def auc_roc(y_true, y_pred):
  # any tensorflow metric
  y_true = tf.to_int32(tf.greater(y_true, 0.5))
  value, update_op = tf.metrics.auc(y_true, y_pred)

  # find all variables created for this metric
  metric_vars = [
      i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1]
  ]

  # Add metric variables to GLOBAL_VARIABLES collection.
  # They will be initialized for new session.
  for v in metric_vars:
    tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

  # force update metric values
  with tf.control_dependencies([update_op]):
    value = tf.identity(value)
    return value


================================================
FILE: kaggle-classification/keras_trainer/model.py
================================================
"""Classifiers for the Toxic Comment Classification Kaggle challenge, https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge

To run locally:
  python keras-trainer/model.py
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import json
import numpy as np
import pandas as pd
import os
import os.path
from comet_ml import Experiment
import tensorflow as tf
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from os.path import expanduser
from sklearn import metrics
from tensorflow.python.framework.errors_impl import NotFoundError
from keras_trainer.cnn_with_attention import CNNWithAttention
from keras_trainer.single_layer_cnn import SingleLayerCnn
from keras_trainer.rnn import RNNModel
from keras_trainer.custom_metrics import auc_roc
from keras_trainer.base_model import BaseModel
from typing import Dict, Type

FLAGS = None

TEMPORARY_MODEL_PATH = 'model.h5'

VALID_MODELS = {
    'cnn_with_attention': CNNWithAttention,
    'single_layer_cnn': SingleLayerCnn,
    'rnn': RNNModel
}  # type: Dict[str, Type[BaseModel]]

DEFAULT_HPARAMS = tf.contrib.training.HParams(
    learning_rate=0.00005,
    dropout_rate=0.5,
    batch_size=128,
    epochs=1,
    sequence_length=250,
    embedding_dim=300,
    train_embedding=False,
    model_type='single_layer_cnn',
    filter_sizes=[3, 4, 5],
    num_filters=[128, 128, 128],
    attention_intermediate_size=128)


class ModelRunner():
  """Toxicity model using CNN + Attention"""

  def __init__(self, job_dir, embeddings_path, log_path, hparams, labels):
    if os.path.exists(TEMPORARY_MODEL_PATH):
      raise FileExistsError('The following file path already exists: {}'.format(
          TEMPORARY_MODEL_PATH))

    self.job_dir = job_dir
    self.model_path = os.path.join(job_dir, 'model.h5')
    self.embeddings_path = embeddings_path
    self.log_path = log_path
    self.hparams = hparams
    self.labels = [l.strip() for l in labels.split(',')]
    print('Setting up tokenizer...')
    self.tokenizer = self._setup_tokenizer()
    print('Setting up embedding matrix...')
    self.embeddings_matrix = self._setup_embeddings_matrix()
    print('Loading model...')
    self._load_model()

  def train(self, train):
    if self.hparams.model_type in VALID_MODELS:
      model = VALID_MODELS[self.hparams.model_type](
          self.embeddings_matrix, self.hparams,
          self.labels).get_model()  # type: BaseModel
    else:
      raise ValueError('You have specified an invalid model type.')

    train_comment = self._prep_texts(train['comment_text'])
    train_labels = np.array(list(zip(*[train[label] for label in self.labels])))

    callbacks = [
        ModelCheckpoint(
            TEMPORARY_MODEL_PATH, save_best_only=True, verbose=True),
        EarlyStopping(monitor='val_loss', mode='auto'),
        TensorBoard(self.log_path)
    ]

    model.fit(
        x=train_comment,
        y=train_labels,
        batch_size=int(self.hparams.batch_size),
        epochs=self.hparams.epochs,
        validation_split=0.1,
        callbacks=callbacks,
        verbose=2)  # Output one line per epoch

    # Necessary because we can't save h5 files to cloud storage directly via
    # Checkpoint.
    tf.gfile.MakeDirs(self.job_dir)
    tf.gfile.Copy(TEMPORARY_MODEL_PATH, self.model_path, overwrite=True)
    tf.gfile.Remove(TEMPORARY_MODEL_PATH)
    print('Saved model to {}'.format(self.model_path))

    self._load_model()

  def predict(self, texts):
    data = self._prep_texts(texts)
    return self.model.predict(data)

  def score_metric(self, data, metric_name, metric_fn):
    """Prints metric scores.

    Args:
      data: Dataset containing 'comment_text' column, that will be used to get
        predictions, as well as label columns to compare the predictions
        against.
      metric_name (str): String to use when printing.
      metric_fn: function that takes labels and predictions and outputs a score
    """

    predictions = self.predict(data['comment_text'])
    # Get an array where each element is a list of all the labels for the
    # specific instance.
    agg = {}
    for label_idx, label in enumerate(self.labels):
      labels = list((data[label] > 0.5).astype(int))
      preds = predictions[:, label_idx]  # label and pred indicies better match
      score = metric_fn(labels, preds)
      agg[label] = score
    print('{}: {}'.format(metric_name, agg))
    if len(agg) > 1:
      print('Mean {}: {}'.format(metric_name, np.mean(list(agg.values()))))

  def score_auc(self, data):
    self.score_metric(
        data, 'ROC AUC', lambda l, p: metrics.roc_auc_score(l, p, average=None))

  def score_precision(self, data):
    self.score_metric(
        data, 'Precision', lambda l, p: metrics.precision_score(
            l, (p > 0.5).astype(int)))

  def score_recall(self, data):
    self.score_metric(
        data,
        'Recall', lambda l, p: metrics.recall_score(l, (p > 0.5).astype(int)))

  def _prep_texts(self, texts):
    return pad_sequences(
        self.tokenizer.texts_to_sequences(texts),
        maxlen=self.hparams.sequence_length)

  def _load_model(self):
    try:
      tf.gfile.Copy(self.model_path, TEMPORARY_MODEL_PATH, overwrite=True)
      self.model = load_model(
          TEMPORARY_MODEL_PATH, custom_objects={'auc_roc': auc_roc})
      tf.gfile.Remove(TEMPORARY_MODEL_PATH)
      print('Model loaded from: {}'.format(self.model_path))
    except NotFoundError:
      print('Could not load model at: {}'.format(self.model_path))

  def _setup_tokenizer(self):
    words = []
    with tf.gfile.Open(self.embeddings_path, 'r') as f:
      for line in f:
        words.append(line.split()[0])
    tokenizer = Tokenizer(lower=True, oov_token='<unk>')
    tokenizer.fit_on_texts(words)
    self.hparams.vocab_size = len(tokenizer.word_index) + 1
    return tokenizer

  def _setup_embeddings_matrix(self):
    embeddings_matrix = np.zeros((self.hparams.vocab_size,
                                  self.hparams.embedding_dim))
    with tf.gfile.Open(self.embeddings_path, 'r') as f:
      for line in f:
        values = line.split()
        word = values[0]
        if word in self.tokenizer.word_index:
          word_idx = self.tokenizer.word_index[word]
          word_embedding = np.asarray(values[1:], dtype='float32')
          embeddings_matrix[word_idx] = word_embedding
    embeddings_matrix[self.hparams.vocab_size - 1] = embeddings_matrix.mean(
        axis=0)
    return embeddings_matrix


if __name__ == '__main__':

  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--train_path',
      type=str,
      default='local_data/train.csv',
      help='Path to the training data.')
  parser.add_argument(
      '--test_path',
      type=str,
      default='local_data/validation.csv',
      help='Path to the test data.')
  parser.add_argument(
      '--embeddings_path',
      type=str,
      default='local_data/glove.6B/glove.6B.100d.txt',
      help='Path to the embeddings.')
  parser.add_argument(
      '--job-dir', type=str, default='local_data/', help='Path to model file.')
  parser.add_argument(
      '--log_path',
      type=str,
      default='local_data/logs/',
      help='Path to write tensorboard logs.')
  parser.add_argument(
      '--comet_key',
      type=str,
      default=None,
      help='Path to file containing comet.ml api key. Set to None to disable comet.ml.'
  )
  parser.add_argument(
      '--comet_project_name',
      type=str,
      default=None,
      help='Name of comet project that tracks results. Must be set if comet_key is.'
  )
  parser.add_argument(
      '--labels',
      default='toxic,severe_toxic,obscene,threat,insult,identity_hate',
      help='A comma separated list of labels to predict.')
  parser.add_argument(
      '--model_type',
      default='single_layer_cnn',
      help='Model type. Valid choices are {}'.format(list(VALID_MODELS.keys())))

  # Hyper-parameters
  parser.add_argument(
      '--learning_rate', type=float, default=0.00005, help='Learning rate.')
  parser.add_argument(
      '--dropout_rate', type=float, default=0.5, help='Dropout rate.')
  parser.add_argument('--batch_size', type=int, default=64, help='Batch size.')

  FLAGS = parser.parse_args()

  hparams = DEFAULT_HPARAMS
  hparams.learning_rate = FLAGS.learning_rate
  hparams.dropout_rate = FLAGS.dropout_rate
  hparams.batch_size = FLAGS.batch_size
  hparams.model_type = FLAGS.model_type

  if FLAGS.comet_key:
    experiment = Experiment(
        api_key=FLAGS.comet_key,
        project_name=FLAGS.comet_project_name,
        team_name='jigsaw',
        auto_param_logging=False,
        parse_args=False)
    experiment.log_multiple_params(hparams.values())
    experiment.log_parameter('train_data_path', FLAGS.train_path)
    experiment.log_parameter('test_data_path', FLAGS.test_path)
    experiment.log_parameter('embeddings_path', FLAGS.embeddings_path)
    experiment.log_parameter('model_path', FLAGS.job_dir)
    experiment.log_parameter('model', hparams.model_type)

  # Used to scope logs to a given trial (when hyper param tuning) so that they
  # don't run over each other. When running locally it will just use the passed
  # in log path.
  trial_log_path = os.path.join(
      FLAGS.log_path,
      json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get(
          'trial', ''))

  model = ModelRunner(
      job_dir=FLAGS.job_dir,
      embeddings_path=FLAGS.embeddings_path,
      log_path=trial_log_path,
      hparams=hparams,
      labels=FLAGS.labels)
  with tf.gfile.Open(FLAGS.train_path, 'rb') as f:
    train = pd.read_csv(f, encoding='utf-8')
  if FLAGS.comet_key:
    experiment.log_dataset_hash(train)
  model.train(train)

  with tf.gfile.Open(FLAGS.test_path, 'rb') as f:
    test_data = pd.read_csv(f, encoding='utf-8')
  if FLAGS.comet_key:
    experiment.log_metric('test_auc', model.score_auc(test_data))

  model.predict(['This sentence is benign'])


================================================
FILE: kaggle-classification/keras_trainer/rnn.py
================================================
"""RNN"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from keras.layers import Input, GRU, Dense, Embedding, Dropout, Bidirectional, TimeDistributed, Flatten, Dot
from keras.models import Model
from keras_trainer import base_model
from keras_trainer.custom_metrics import auc_roc


class RNNModel(base_model.BaseModel):
  """ RNN

  hparams:
    embedding_dim
    vocab_size
    train_embedding
  """

  def __init__(self, embeddings_matrix, hparams, labels):
    self.embeddings_matrix = embeddings_matrix
    self.hparams = hparams
    self.labels = labels
    self.num_labels = len(labels)

  def get_model(self):
    sequence_length = self.hparams.sequence_length

    I = Input(shape=(sequence_length,), dtype='float32')
    E = Embedding(
        self.hparams.vocab_size,
        self.hparams.embedding_dim,
        weights=[self.embeddings_matrix],
        input_length=sequence_length,
        trainable=self.hparams.train_embedding)(
            I)
    H = Bidirectional(GRU(128, return_sequences=True))(E)
    A = TimeDistributed(
        Dense(128, activation='relu'), input_shape=(sequence_length, 256))(
            H)
    A = TimeDistributed(Dense(1, activation='softmax'))(H)
    X = Dot((1, 1))([H, A])
    X = Flatten()(X)
    X = Dense(128, activation='relu')(X)
    X = Dropout(self.hparams.dropout_rate)(X)
    Output = Dense(self.num_labels, activation='sigmoid')(X)

    model = Model(inputs=I, outputs=Output)
    model.compile(
        optimizer='rmsprop',
        loss='binary_crossentropy',
        metrics=['accuracy', auc_roc])

    print(model.summary())
    return model


================================================
FILE: kaggle-classification/keras_trainer/single_layer_cnn.py
================================================
"""Model class for a single layer CNN"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from keras.layers import Conv1D
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Embedding
from keras.layers import Flatten
from keras.layers import Input
from keras.layers import MaxPooling1D
from keras.layers import Activation
from keras.layers import Concatenate
from keras.optimizers import Adam
from keras.models import Model
from keras_trainer import base_model
from keras_trainer.custom_metrics import auc_roc


class SingleLayerCnn(base_model.BaseModel):
  """Single Layer Based CNN

  hparams:
    embedding_dim
    vocab_size
    sequence_length
    dropout_rate
    train_embedding
  """

  def __init__(self, embeddings_matrix, hparams, labels):
    self.embeddings_matrix = embeddings_matrix
    self.hparams = hparams
    self.labels = labels
    self.num_labels = len(labels)

  def get_model(self) -> Model:
    I = Input(shape=(self.hparams.sequence_length,), dtype='float32')
    E = Embedding(
        self.hparams.vocab_size,
        self.hparams.embedding_dim,
        weights=[self.embeddings_matrix],
        input_length=self.hparams.sequence_length,
        trainable=self.hparams.train_embedding)(
            I)
    X5 = Conv1D(128, 5, activation='relu', padding='same')(E)
    X5 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X5)
    X4 = Conv1D(128, 4, activation='relu', padding='same')(E)
    X4 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X4)
    X3 = Conv1D(128, 3, activation='relu', padding='same')(E)
    X3 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X3)
    X = Concatenate(axis=-1)([X5, X4, X3])
    X = Flatten()(X)
    X = Dropout(self.hparams.dropout_rate)(X)
    X = Dense(128, activation='relu')(X)
    X = Dropout(self.hparams.dropout_rate)(X)
    Output = Dense(self.num_labels, activation='sigmoid', name='outputs')(X)

    model = Model(inputs=I, outputs=Output)
    model.compile(
        optimizer=Adam(lr=self.hparams.learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy', auc_roc])
    print(model.summary())
    return model


================================================
FILE: kaggle-classification/requirements.txt
================================================
absl-py==0.1.9
astor==0.6.2
bleach==3.3.0
certifi==2024.7.4
chardet==3.0.4
comet-ml==1.0.8
enum34==1.1.6
futures==3.1.1
gast==0.2.0
grpcio==1.53.2
h5py==2.7.1
html5lib==0.999999999
idna==3.7
kaggle==1.0.5
Keras==2.13.1
Markdown==2.6.11
mypy==0.600
nltk==3.9
numpy==1.22.0
pandas==0.22.0
Pillow==10.3.0
protobuf==3.18.3
python-dateutil==2.6.1
pytz==2017.3
PyYAML==5.4
requests==2.32.2
scikit-learn==1.5.0
scipy==1.10.0
six==1.11.0
sklearn==0.0
tensorboard==1.8.0
tensorflow==2.12.1
tensorflow-tensorboard==1.5.1
termcolor==1.1.0
tflearn==0.3.2
typed-ast==1.1.0
urllib3==1.26.18
websocket-client==0.47.0
Werkzeug==3.0.3
wurlitzer==1.0.2


================================================
FILE: kaggle-classification/setup.py
================================================
from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = [
    'tflearn>=0.3.2', 'Keras==2.13.1', 'h5py==2.7.1', 'comet-ml==1.0.8',
    'nltk>=3.3'
]

setup(
    name='trainer',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='tflearn.')

setup(
    name='keras_trainer',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='tflearn.')

setup(
    name='tf_trainer',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='tflearn.')


================================================
FILE: kaggle-classification/trainer/__init__.py
================================================


================================================
FILE: kaggle-classification/trainer/model.py
================================================
"""Classifiers for the Toxic Comment Classification Kaggle challenge, https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge

To run locally:
  python trainer/model.py --train_data=train.csv --predict_data=test.csv
  --y_class=toxic

To run locally using Cloud ML Engine:
  gcloud ml-engine local train \
        --module-name=trainer.model \
        --package-path=trainer \
        --job-dir=model -- \
        --train_data=train.csv \
        --predict_data=test.csv \
        --y_class=toxic \
        --train_steps=100

To run TensorBoard locally:
  tensorboard --logdir=model/

Then visit http://localhost:6006/ to see the dashboard.
"""

from __future__ import print_function
from __future__ import division

import argparse
import os
import sys
import pandas as pd
import tensorflow as tf
from sklearn import metrics
from trainer import wikidata
from collections import namedtuple

from tensorflow.contrib.training.python.training import hparam

FLAGS = None

# Data Params
TRAIN_PERCENT = .8  # Percent of data to allocate to training
DATA_SEED = 48173  # Random seed used for splitting the data into train/test
MAX_LABEL = 2
MAX_DOCUMENT_LENGTH = 500  # Max length of each comment in words

# CNN parameters
DEFAULT_FILTER_SIZES = [2, 3, 4, 5]

# Bag of Word parameters
BOWParams = namedtuple("BOWParams", ["EMBEDDING_SIZE"])
BOW_PARAMS = BOWParams(EMBEDDING_SIZE=20)

WORDS_FEATURE = "words"  # Name of the input words feature.
MODEL_LIST = ["bag_of_words", "cnn"]  # Possible models

# Training Params
TRAIN_SEED = 9812  # Random seed used to initialize training
BATCH_SIZE = 128


def estimator_spec_for_softmax_classification(logits, labels, mode,
                                              learning_rate):
  """Depending on the value of mode, different EstimatorSpec arguments are required.

  For mode == ModeKeys.TRAIN: required fields are loss and train_op.
  For mode == ModeKeys.EVAL: required field is loss.
  For mode == ModeKeys.PREDICT: required fields are predictions.

  Returns EstimatorSpec instance for softmax classification.
  """
  predicted_classes = tf.argmax(logits, axis=1)
  predicted_probs = tf.nn.softmax(logits, name="softmax_tensor")

  predictions = {
      # Holds the raw logit values
      "logits": logits,

      # Holds the class id (0,1) representing the model's prediction of the most
      # likely species for this example.
      "classes": predicted_classes,

      # Holds the probabilities for each prediction
      "probs": predicted_probs,
  }

  # Represents an output of a model that can be served.
  export_outputs = {
      "output": tf.estimator.export.ClassificationOutput(scores=predicted_probs)
  }

  # PREDICT Mode
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(
        mode=mode, predictions=predictions, export_outputs=export_outputs)

  # Calculate loss for both TRAIN and EVAL modes
  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

  eval_metric_ops = {
      "accuracy":
          tf.metrics.accuracy(
              labels=labels, predictions=predicted_classes, name="acc_op"),
      "auc":
          tf.metrics.auc(
              labels=labels, predictions=predicted_classes, name="auc_op"),
  }

  # Add summary ops to the graph. These metrics will be tracked graphed
  # on each checkpoint by TensorBoard.
  tf.summary.scalar("accuracy", eval_metric_ops["accuracy"][1])
  tf.summary.scalar("auc", eval_metric_ops["auc"][1])

  # TRAIN Mode
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    logging_hook = tf.train.LoggingTensorHook(
        tensors={"loss": loss}, every_n_iter=50)

    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
        training_hooks=[logging_hook],
        predictions={"loss": loss},
        export_outputs=export_outputs,
        eval_metric_ops=eval_metric_ops)

  # EVAL Mode
  assert mode == tf.estimator.ModeKeys.EVAL

  return tf.estimator.EstimatorSpec(
      mode=mode,
      loss=loss,
      predictions=predictions,
      eval_metric_ops=eval_metric_ops,
      export_outputs=export_outputs)


def get_cnn_model(embedding_size, num_filters, dropout_keep_prob):

  def cnn_model(features, labels, mode):
    filter_sizes = DEFAULT_FILTER_SIZES

    with tf.name_scope("embedding"):
      W = tf.Variable(
          tf.random_uniform([n_words, embedding_size], -1.0, 1.0), name="W")

      embedded_chars = tf.nn.embedding_lookup(W, features[WORDS_FEATURE])
      embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)

    pooled_outputs = []
    for i, filter_size in enumerate(filter_sizes):
      with tf.name_scope("conv-maxpool-%s" % filter_size):

        # Convolution Layer
        filter_shape = [filter_size, embedding_size, 1, num_filters]
        W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
        conv = tf.nn.conv2d(
            embedded_chars_expanded,
            W,
            strides=[1, 1, 1, 1],
            padding="VALID",
            name="conv")
        # Apply nonlinearity
        hh = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

        # Max-pooling over the outputs. Max over samples in batch and
        # all filters.
        pooled = tf.nn.max_pool(
            hh,
            ksize=[1, MAX_DOCUMENT_LENGTH - filter_size + 1, 1, 1],
            strides=[1, 1, 1, 1],
            padding="VALID",
            name="pool")

        pooled_outputs.append(pooled)

    # Combine all the pooled features
    num_filters_total = num_filters * len(filter_sizes)
    h_pool = tf.concat(pooled_outputs, 3)
    h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

    # Add dropout in training
    with tf.name_scope("dropout"):
      # Set dropout rate to 1 (disable dropout) by default
      h_drop = tf.nn.dropout(h_pool_flat, 1.0)

      if mode == tf.estimator.ModeKeys.TRAIN:
        h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob)

    # Add a fully connected layer to do prediction
    with tf.name_scope("output"):
      W = tf.Variable(
          tf.truncated_normal([num_filters_total, MAX_LABEL], stddev=0.1),
          name="W")
      b = tf.Variable(tf.constant(0.1, shape=[MAX_LABEL]), name="b")
      scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")

    return estimator_spec_for_softmax_classification(
        logits=scores,
        labels=labels,
        mode=mode,
        learning_rate=FLAGS.learning_rate)

  return cnn_model


def bag_of_words_model(features, labels, mode):
  """A bag-of-words model using a learned word embedding.

  Note it disregards the word order in the text.

  Returns a tf.estimator.EstimatorSpec.
  """

  bow_column = tf.feature_column.categorical_column_with_identity(
      WORDS_FEATURE, num_buckets=n_words)

  # The embedding values are initialized randomly, and are trained along with
  # all other model parameters to minimize the training loss.
  bow_embedding_column = tf.feature_column.embedding_column(
      bow_column, dimension=BOW_PARAMS.EMBEDDING_SIZE)

  bow = tf.feature_column.input_layer(
      features, feature_columns=[bow_embedding_column])

  logits = tf.layers.dense(bow, MAX_LABEL, activation=None)

  return estimator_spec_for_softmax_classification(
      logits=logits,
      labels=labels,
      mode=mode,
      learning_rate=FLAGS.learning_rate)


def main(FLAGS):
  global n_words

  tf.logging.set_verbosity(tf.logging.INFO)

  if FLAGS.verbose:
    tf.logging.info("Running in verbose mode")
    tf.logging.set_verbosity(tf.logging.DEBUG)

  # Load and split data
  tf.logging.info("Loading data from {0}".format(FLAGS.train_data))

  data = wikidata.WikiData(
      FLAGS.train_data,
      FLAGS.y_class,
      seed=DATA_SEED,
      train_percent=TRAIN_PERCENT,
      max_document_length=MAX_DOCUMENT_LENGTH,
      char_ngrams=FLAGS.char_ngrams,
      min_frequency=FLAGS.min_frequency)

  n_words = len(data.vocab_processor.vocabulary_)
  tf.logging.info("Total words: %d" % n_words)

  # Build model
  if FLAGS.model == "bag_of_words":
    model_fn = bag_of_words_model

    # Subtract 1 because VocabularyProcessor outputs a word-id matrix where word
    # ids start from 1 and 0 means 'no word'. But categorical_column_with_identity
    # assumes 0-based count and uses -1 for missing word.
    data.x_train = data.x_train - 1
    data.x_test = data.x_test - 1
  elif FLAGS.model == "cnn":
    model_fn = get_cnn_model(FLAGS.embedding_size, FLAGS.num_filters,
                             FLAGS.dropout_keep_prob)
  else:
    tf.logging.error("Unknown specified model '{}', must be one of {}".format(
        FLAGS.model, MODEL_LIST))
    raise ValueError

  classifier = tf.estimator.Estimator(
      model_fn=model_fn,
      config=tf.contrib.learn.RunConfig(
          tf_random_seed=TRAIN_SEED,
          ## Uncomment to see CPU/GPU allocation in logs.
          # session_config=tf.ConfigProto(log_device_placement=True),
      ),
      model_dir=FLAGS.job_dir)

  # Train model
  train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={WORDS_FEATURE: data.x_train},
      y=data.y_train,
      batch_size=BATCH_SIZE,
      num_epochs=None,  # Note: For training, set this to None, so the input_fn
      # keeps returning data until the required number of train
      # steps is reached.
      shuffle=True)
  classifier.train(input_fn=train_input_fn, steps=FLAGS.train_steps)

  # Predict on held-out test data
  test_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={WORDS_FEATURE: data.x_test},
      y=data.y_test,
      num_epochs=1,  # Note: For evaluation and prediction set this to 1,
      # so the input_fn will iterate over the data once and
      # then raise OutOfRangeError
      shuffle=False)
  predicted_test = classifier.predict(input_fn=test_input_fn)
  test_out = pd.DataFrame(
      [(p["classes"], p["probs"][1]) for p in predicted_test],
      columns=["y_predicted", "prob"])

  # Score with sklearn and TensorFlow
  sklearn_score = metrics.accuracy_score(data.y_test, test_out["y_predicted"])
  tf_scores = classifier.evaluate(input_fn=test_input_fn)

  train_size = len(data.x_train)
  test_size = len(data.x_test)

  baseline = len(data.y_train[data.y_train == 0]) / len(data.y_train)
  if baseline < .5:
    baseline = 1 - baseline

  tf.logging.info("")
  tf.logging.info("----------Evaluation on Held-Out Data---------")
  tf.logging.info("Train Size: {0} Test Size: {1}".format(
      train_size, test_size))
  tf.logging.info("Baseline (class distribution): {0:f}".format(baseline))
  tf.logging.info("Accuracy (sklearn): {0:f}".format(sklearn_score))

  for key in sorted(tf_scores):
    tf.logging.info("%s: %s" % (key, tf_scores[key]))

  # Export the model
  feature_spec = {
      WORDS_FEATURE:
          tf.FixedLenFeature(dtype=tf.int64, shape=MAX_DOCUMENT_LENGTH)
  }
  serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
      feature_spec)

  classifier.export_savedmodel(FLAGS.job_dir, serving_input_fn)


if __name__ == "__main__":

  parser = argparse.ArgumentParser()
  parser.add_argument(
      "--verbose", help="Run in verbose mode.", action="store_true")
  parser.add_argument(
      "--train_data", type=str, default="", help="Path to the training data.")
  parser.add_argument(
      "--y_class",
      type=str,
      default="toxic",
      help="Class to train model against, one of cnn, bag_of_words")
  parser.add_argument(
      "--model",
      type=str,
      default="bag_of_words",
      help="The model to train, one of {}".format(MODEL_LIST))
  parser.add_argument(
      "--train_steps",
      type=int,
      default=100,
      help="The number of steps to train the model")
  parser.add_argument(
      "--embedding_size",
      type=int,
      default=50,
      help="The size of the word embedding")
  parser.add_argument(
      "--dropout_keep_prob",
      type=float,
      default=0.75,
      help="The dropout keep probability")
  parser.add_argument(
      "--num_filters",
      type=int,
      default=10,
      help="The number of filters in each size")
  parser.add_argument(
      "--job-dir",
      type=str,
      default="",
      help="The directory where the job is staged")
  parser.add_argument(
      "--char_ngrams",
      type=int,
      default=0,
      help="Size of overlapping character ngrams to split into, use words if 0")
  parser.add_argument(
      "--learning_rate",
      type=float,
      default=0.01,
      help="The model learning rate")
  parser.add_argument(
      "--min_frequency",
      type=int,
      default=0,
      help="Minimum count for tokens passed to VocabularyProcessor")

  FLAGS = parser.parse_args()

  main(FLAGS)


================================================
FILE: kaggle-classification/trainer/wikidata.py
================================================
"""Class to encapsulate training and test data."""

import numpy as np
import pandas as pd
import tensorflow as tf
import tflearn
from sklearn.model_selection import train_test_split

Y_CLASSES = [
    'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
]


def ngrams(sentence, ngram_size):
  """Converts a string into a list of ngrams of characters.

  ngrams('abra cadabra', 5) =
    [('a', 'b', 'r', 'a', ' '), ('b', 'r', 'a', ' ', 'c'), ...
     ('a', 'd', 'a', 'b', 'r'), ('d', 'a', 'b', 'r', 'a')]
  """
  chars = list(sentence)
  return zip(*[chars[i:] for i in range(ngram_size)])


class WikiData:

  def __init__(self,
               data_path,
               y_class,
               max_document_length,
               vocab_processor_path=None,
               test_mode=False,
               seed=None,
               train_percent=None,
               char_ngrams=None,
               min_frequency=None):
    """Args:

      * data_path (string): path to file containing train or test data
      * y_class (string): the class we're training or testing on
      * vocab_processor_path (string): if provided, the comment_text data will
      be processed with the vocab processor at that location. If not, a new
      vocab_processor will be created using the training data.
      * test_mode (boolean): true if loading data just to test on, not training
      a model
      * seed (integer): a random seed to use for data splitting
      * train_percent (fload): the percent of data we should use for training
      data
    """
    data = self._load_csv(data_path)

    self.x_train, self.x_train_text = None, None
    self.x_test, self.x_test_text = None, None
    self.y_train = None
    self.y_test = None
    self.vocab_processor = None

    # If test_mode is True, then put all the data in x_test and y_test
    if test_mode:
      train_percent = 0

    # Split the data into test / train sets
    self.x_train_text, self.x_test_text, self.y_train, self.y_test \
      = self._split(data, train_percent, 'comment_text', y_class, seed)

    # Either load a VocabularyProcessor or compute one from the training data
    if test_mode:

      # If test_mode is True and no vocab_processor_path is specified, then
      # return an error. We shouldn't train a VocabProcessor at test time.
      if vocab_processor_path is None:
        tf.logging.error(
            'Loading data in test_mode with no vocab_processor_path')
        raise ValueError

      self.vocab_processor = self.load_vocab_processor(vocab_processor_path)

    else:
      tokenizer_fn = None
      if char_ngrams:
        tokenizer_fn = lambda iterator: (
            ngrams(x, char_ngrams) for x in iterator)
      self.vocab_processor = tflearn.data_utils.VocabularyProcessor(
          max_document_length=max_document_length,
          min_frequency=min_frequency,
          tokenizer_fn=tokenizer_fn)
      self.x_train = np.array(
          list(self.vocab_processor.fit_transform(self.x_train_text)))

    # Apply the VocabularyProcessor to the test data
    self.x_test = np.array(
        list(self.vocab_processor.transform(self.x_test_text)))

  def _load_vocab_processor(self, path):
    """Load a VocabularyProcessor from the provided path"""
    return tflearn.data_utils.VocabularyProcessor.restore(path)

  def _load_csv(self, path):
    """Reads CSV from specified location and returns the data as a Pandas Dataframe. Will work with a Cloud Storage path, e.g.

    'gs://<bucket>/<blob>' or a local path.

    Assumes data can fit into memory.
    """
    with tf.gfile.Open(path, 'rb') as fileobj:
      df = pd.read_csv(fileobj, encoding='utf-8')

    return df

  def _split(self, data, train_percent, x_field, y_class, seed=None):
    """Split divides the Wikipedia data into test and train subsets.

    Args:
      * data (dataframe): a dataframe with data for 'comment_text' and y_class
      * train_percent (float): the fraction of data to use for training
      * x_field (string): attribute of the wiki data to use to train, e.g.
        'comment_text'
      * y_class (string): attribute of the wiki data to predict, e.g. 'toxic'
      * seed (integer): a seed to use to split the data in a reproducible way

    Returns:
      x_train (dataframe): a pandas series with the text from each train example
      y_train (dataframe): the 0 or 1 labels for the training data
      x_test (dataframe):  a pandas series with the text from each test example
      y_test (dataframe):  the 0 or 1 labels for the test data
    """

    if y_class not in Y_CLASSES:
      tf.logging.error('Specified y_class {0} not in list of possible classes {1}'\
            .format(y_class, Y_CLASSES))
      raise ValueError

    if train_percent > 1 or train_percent < 0:
      tf.logging.error('Specified train_percent {0} is not between 0 and 1'\
            .format(train_percent))
      raise ValueError

    X = data[x_field]
    y = data[y_class]
    x_train, x_test, y_train, y_test = train_test_split(
        X, y, test_size=1 - train_percent, random_state=seed)

    return x_train, x_test, np.array(y_train), np.array(y_test)


================================================
FILE: model_evaluation/BiosBias Evaluation.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "from IPython.display import display\n",
    "import json\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import random\n",
    "import re\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn.metrics as metrics\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read scored test data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "standard_data_path = 'gs://conversationai-models/biosbias/scored_data/test_standard_0409.csv'\n",
    "scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_scrubbed_0409.csv'\n",
    "very_scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_very_scrubbed_0409.csv'\n",
    "gender_data_path = 'gs://conversationai-models/biosbias/scored_data/test_data_gender.csv'\n",
    "\n",
    "\n",
    "perf_df = pd.read_csv(tf.gfile.Open(standard_data_path)).drop_duplicates(subset=['tokens'])\n",
    "scrubbed_df = pd.read_csv(tf.gfile.Open(scrubbed_data_path)).drop_duplicates(subset=['tokens'])\n",
    "very_scrubbed_df = pd.read_csv(tf.gfile.Open(very_scrubbed_data_path)).drop_duplicates(subset=['tokens'])\n",
    "gender_df = pd.read_csv(tf.gfile.Open(gender_data_path)).drop_duplicates(subset=['tokens'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59824, 300)\n",
      "(59820, 36)\n"
     ]
    }
   ],
   "source": [
    "print(perf_df.shape)\n",
    "print(scrubbed_df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = perf_df.join(scrubbed_df, rsuffix = '_scrubbed')\n",
    "df = df.join(very_scrubbed_df, rsuffix = '_very_scrubbed')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tokens</th>\n",
       "      <th>gender</th>\n",
       "      <th>label</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6</th>\n",
       "      <th>...</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[u'he', u'is', u'currently', u'working', u'clo...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "      <td>0.000008</td>\n",
       "      <td>4.625991e-14</td>\n",
       "      <td>0.000089</td>\n",
       "      <td>0.000432</td>\n",
       "      <td>2.642943e-04</td>\n",
       "      <td>1.613340e-07</td>\n",
       "      <td>4.687537e-07</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001929</td>\n",
       "      <td>1.914383e-06</td>\n",
       "      <td>0.000097</td>\n",
       "      <td>0.000332</td>\n",
       "      <td>7.086468e-07</td>\n",
       "      <td>8.798547e-16</td>\n",
       "      <td>0.000041</td>\n",
       "      <td>0.000395</td>\n",
       "      <td>0.000054</td>\n",
       "      <td>8.315536e-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[u'she', u'has', u'a', u'passion', u'for', u'w...</td>\n",
       "      <td>F</td>\n",
       "      <td>26</td>\n",
       "      <td>0.000001</td>\n",
       "      <td>5.970340e-18</td>\n",
       "      <td>0.000004</td>\n",
       "      <td>0.000155</td>\n",
       "      <td>8.439872e-06</td>\n",
       "      <td>1.380430e-07</td>\n",
       "      <td>8.653511e-09</td>\n",
       "      <td>...</td>\n",
       "      <td>0.013356</td>\n",
       "      <td>7.866625e-01</td>\n",
       "      <td>0.009269</td>\n",
       "      <td>0.024264</td>\n",
       "      <td>3.710595e-04</td>\n",
       "      <td>2.425320e-11</td>\n",
       "      <td>0.004488</td>\n",
       "      <td>0.002426</td>\n",
       "      <td>0.032467</td>\n",
       "      <td>1.274749e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[u'growing', u'up', u'under', u'the', u'influe...</td>\n",
       "      <td>M</td>\n",
       "      <td>22</td>\n",
       "      <td>0.000205</td>\n",
       "      <td>1.023775e-15</td>\n",
       "      <td>0.008020</td>\n",
       "      <td>0.000054</td>\n",
       "      <td>1.159827e-06</td>\n",
       "      <td>2.420847e-06</td>\n",
       "      <td>4.043094e-06</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000135</td>\n",
       "      <td>8.046401e-04</td>\n",
       "      <td>0.002173</td>\n",
       "      <td>0.000697</td>\n",
       "      <td>3.003297e-05</td>\n",
       "      <td>8.979249e-14</td>\n",
       "      <td>0.001901</td>\n",
       "      <td>0.000097</td>\n",
       "      <td>0.001727</td>\n",
       "      <td>4.318769e-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[u'he', u'earned', u'his', u'beng', u'degree',...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "      <td>0.000009</td>\n",
       "      <td>1.354895e-13</td>\n",
       "      <td>0.001508</td>\n",
       "      <td>0.000051</td>\n",
       "      <td>1.071294e-07</td>\n",
       "      <td>1.333064e-08</td>\n",
       "      <td>1.857020e-05</td>\n",
       "      <td>...</td>\n",
       "      <td>0.009217</td>\n",
       "      <td>1.700057e-02</td>\n",
       "      <td>0.136035</td>\n",
       "      <td>0.009581</td>\n",
       "      <td>2.460610e-03</td>\n",
       "      <td>1.396903e-09</td>\n",
       "      <td>0.002276</td>\n",
       "      <td>0.009811</td>\n",
       "      <td>0.026841</td>\n",
       "      <td>1.840305e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[u'her', u'professional', u'and', u'educationa...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "      <td>0.001034</td>\n",
       "      <td>6.887217e-12</td>\n",
       "      <td>0.000701</td>\n",
       "      <td>0.021189</td>\n",
       "      <td>1.852501e-03</td>\n",
       "      <td>6.723991e-05</td>\n",
       "      <td>7.880444e-06</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000425</td>\n",
       "      <td>9.174340e-08</td>\n",
       "      <td>0.995151</td>\n",
       "      <td>0.001635</td>\n",
       "      <td>9.952086e-11</td>\n",
       "      <td>4.422046e-14</td>\n",
       "      <td>0.000974</td>\n",
       "      <td>0.000039</td>\n",
       "      <td>0.000482</td>\n",
       "      <td>1.483144e-07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 372 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              tokens gender  label  \\\n",
       "0  [u'he', u'is', u'currently', u'working', u'clo...      M     25   \n",
       "1  [u'she', u'has', u'a', u'passion', u'for', u'w...      F     26   \n",
       "2  [u'growing', u'up', u'under', u'the', u'influe...      M     22   \n",
       "3  [u'he', u'earned', u'his', u'beng', u'degree',...      M     25   \n",
       "4  [u'her', u'professional', u'and', u'educationa...      F     25   \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0  \\\n",
       "0                                           0.000008                           \n",
       "1                                           0.000001                           \n",
       "2                                           0.000205                           \n",
       "3                                           0.000009                           \n",
       "4                                           0.001034                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1  \\\n",
       "0                                       4.625991e-14                           \n",
       "1                                       5.970340e-18                           \n",
       "2                                       1.023775e-15                           \n",
       "3                                       1.354895e-13                           \n",
       "4                                       6.887217e-12                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2  \\\n",
       "0                                           0.000089                           \n",
       "1                                           0.000004                           \n",
       "2                                           0.008020                           \n",
       "3                                           0.001508                           \n",
       "4                                           0.000701                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3  \\\n",
       "0                                           0.000432                           \n",
       "1                                           0.000155                           \n",
       "2                                           0.000054                           \n",
       "3                                           0.000051                           \n",
       "4                                           0.021189                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4  \\\n",
       "0                                       2.642943e-04                           \n",
       "1                                       8.439872e-06                           \n",
       "2                                       1.159827e-06                           \n",
       "3                                       1.071294e-07                           \n",
       "4                                       1.852501e-03                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5  \\\n",
       "0                                       1.613340e-07                           \n",
       "1                                       1.380430e-07                           \n",
       "2                                       2.420847e-06                           \n",
       "3                                       1.333064e-08                           \n",
       "4                                       6.723991e-05                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6  \\\n",
       "0                                       4.687537e-07                           \n",
       "1                                       8.653511e-09                           \n",
       "2                                       4.043094e-06                           \n",
       "3                                       1.857020e-05                           \n",
       "4                                       7.880444e-06                           \n",
       "\n",
       "                                      ...                                      \\\n",
       "0                                     ...                                       \n",
       "1                                     ...                                       \n",
       "2                                     ...                                       \n",
       "3                                     ...                                       \n",
       "4                                     ...                                       \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23  \\\n",
       "0                                           0.001929                            \n",
       "1                                           0.013356                            \n",
       "2                                           0.000135                            \n",
       "3                                           0.009217                            \n",
       "4                                           0.000425                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24  \\\n",
       "0                                       1.914383e-06                            \n",
       "1                                       7.866625e-01                            \n",
       "2                                       8.046401e-04                            \n",
       "3                                       1.700057e-02                            \n",
       "4                                       9.174340e-08                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25  \\\n",
       "0                                           0.000097                            \n",
       "1                                           0.009269                            \n",
       "2                                           0.002173                            \n",
       "3                                           0.136035                            \n",
       "4                                           0.995151                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26  \\\n",
       "0                                           0.000332                            \n",
       "1                                           0.024264                            \n",
       "2                                           0.000697                            \n",
       "3                                           0.009581                            \n",
       "4                                           0.001635                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27  \\\n",
       "0                                       7.086468e-07                            \n",
       "1                                       3.710595e-04                            \n",
       "2                                       3.003297e-05                            \n",
       "3                                       2.460610e-03                            \n",
       "4                                       9.952086e-11                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28  \\\n",
       "0                                       8.798547e-16                            \n",
       "1                                       2.425320e-11                            \n",
       "2                                       8.979249e-14                            \n",
       "3                                       1.396903e-09                            \n",
       "4                                       4.422046e-14                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29  \\\n",
       "0                                           0.000041                            \n",
       "1                                           0.004488                            \n",
       "2                                           0.001901                            \n",
       "3                                           0.002276                            \n",
       "4                                           0.000974                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30  \\\n",
       "0                                           0.000395                            \n",
       "1                                           0.002426                            \n",
       "2                                           0.000097                            \n",
       "3                                           0.009811                            \n",
       "4                                           0.000039                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31  \\\n",
       "0                                           0.000054                            \n",
       "1                                           0.032467                            \n",
       "2                                           0.001727                            \n",
       "3                                           0.026841                            \n",
       "4                                           0.000482                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32  \n",
       "0                                       8.315536e-08                           \n",
       "1                                       1.274749e-04                           \n",
       "2                                       4.318769e-06                           \n",
       "3                                       1.840305e-04                           \n",
       "4                                       1.483144e-07                           \n",
       "\n",
       "[5 rows x 372 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(59824, 372)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59753, 372)\n"
     ]
    }
   ],
   "source": [
    "df = df.dropna()\n",
    "print(df.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_class_from_col_name(col_name):\n",
    "    #print(col_name)\n",
    "    pattern = r'^.*_(\\d+)$'\n",
    "    return int(re.search(pattern, col_name).group(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_best_class(df, model_name, class_names):\n",
    "    model_class_names = ['{}_{}'.format(model_name, class_name) for class_name in class_names]\n",
    "    sub_df = df[model_class_names]\n",
    "    df['{}_class'.format(model_name)] = sub_df.idxmax(axis=1).apply(get_class_from_col_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['tokens', 'gender', 'label',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_32',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_32',\n",
       "       'tokens_scrubbed', 'gender_scrubbed', 'label_scrubbed',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_32',\n",
       "       'tokens_very_scrubbed', 'gender_very_scrubbed',\n",
       "       'label_very_scrubbed',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_0',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_1',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_2',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_3',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_4',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_5',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_6',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_7',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_8',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_9',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_10',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_11',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_12',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_13',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_14',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_15',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_16',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_17',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_18',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_19',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_20',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_21',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_22',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31',\n",
       "       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Can check model names here\n",
    "# df.columns.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# May have to change.\n",
    "# Can look them up in experiment tracker.\n",
    "MODEL_NAMES = {\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837': 'debiased_tolga',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941': 'debiased_biosbias',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003': 'strong_debiased_1',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019': 'strong_debiased_2',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034': 'strong_debiased_3',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055': 'strong_debiased_4',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117': 'glove',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113': 'strong_no_equalize',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131': 'strong_no_projection', \n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954': 'scrubbed',\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254': 'very_scrubbed'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "CLASS_NAMES = range(33)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _model in MODEL_NAMES:\n",
    "    find_best_class(df, _model, CLASS_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Labels with either gender having too few examples\n",
    "bad_labels = df.groupby('label').gender.value_counts().reset_index(name = 'count').query('count < 5').label.values\n",
    "assert len(bad_labels) == 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Accuracy Calculation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy for model debiased_biosbias: 0.806972034877\n",
      "Accuracy for model very_scrubbed: 0.355915184175\n",
      "Accuracy for model debiased_tolga: 0.818921225713\n",
      "Accuracy for model strong_debiased_1: 0.817984034274\n",
      "Accuracy for model strong_no_projection: 0.806687530333\n",
      "Accuracy for model strong_debiased_2: 0.81733134738\n",
      "Accuracy for model strong_no_equalize: 0.815239402206\n",
      "Accuracy for model glove: 0.817950563152\n",
      "Accuracy for model strong_debiased_4: 0.814737335364\n",
      "Accuracy for model strong_debiased_3: 0.817599116362\n",
      "Accuracy for model scrubbed: 0.130503907754\n"
     ]
    }
   ],
   "source": [
    "accuracy_list = []\n",
    "for _model in MODEL_NAMES:\n",
    "    is_correct = (df['{}_class'.format(_model)] == df['label'])\n",
    "    _acc = sum(is_correct)/len(is_correct)\n",
    "    accuracy_list.append(_acc)\n",
    "    print ('Accuracy for model {}: {}'.format(MODEL_NAMES[_model], _acc))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Fairness Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _class in CLASS_NAMES:\n",
    "    df['label_{}'.format(_class)] = (df['label'] == _class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gender ratios of classes\n",
    "gender_counts = df.groupby('label').gender.value_counts().reset_index(name = 'count')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def frac_female(df):\n",
    "    m_count = df[df['gender'] == \"M\"]['count'].values[0]\n",
    "    f_count = df[df['gender'] == \"F\"]['count'].values[0]\n",
    "    return {'label': df['label'].values[0], 'frac_female': f_count/(m_count+f_count)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "frac_female_df = pd.DataFrame(list(gender_counts.groupby('label', as_index = False).apply(frac_female)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_tpr(df, _class, _model, threshold = 0.5):\n",
    "    tpr = metrics.recall_score(df['label_{}'.format(_class)],\n",
    "                               df['{}_{}'.format(_model,_class)] > threshold)\n",
    "    return tpr\n",
    "    \n",
    "def compute_tpr_by_gender(df, _class, _model, threshold = 0.5):\n",
    "    tpr_m = compute_tpr(df.query('gender == \"M\"'), _class, _model, threshold)\n",
    "    tpr_f = compute_tpr(df.query('gender == \"F\"'), _class, _model, threshold)\n",
    "    return {'M': tpr_m, 'F': tpr_f}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_tpr_tnr(df, _class, _model, threshold = 0.5):\n",
    "    #cm = metrics.confusion_matrix(df['label_{}'.format(_class)],\n",
    "    #                              df['{}_{}'.format(_model,_class)] > threshold)\n",
    "    cm = pd.crosstab(df['label_{}'.format(_class)], df['{}_{}'.format(_model,_class)] > threshold)\n",
    "    #display(cm)\n",
    "    if cm.shape[0] > 1:\n",
    "        tn = cm.iloc[0,0]\n",
    "        fp = cm.iloc[0,1]\n",
    "        fn = cm.iloc[1,0]\n",
    "        tp = cm.iloc[1,1]\n",
    "        tpr = tp/(tp+fn)\n",
    "        tnr = tn/(tn+fp)\n",
    "    else:\n",
    "        tpr = 0\n",
    "        tnr = 1\n",
    "    return tpr, tnr\n",
    "\n",
    "def compute_tr_by_gender(df, _class, _model, threshold = 0.5):\n",
    "    tpr_m, tnr_m = compute_tpr_tnr(df.query('gender == \"M\"'), _class, _model, threshold)\n",
    "    tpr_f, tnr_f = compute_tpr_tnr(df.query('gender == \"F\"'), _class, _model, threshold)\n",
    "    return {'TPR_m': tpr_m, 'TPR_f': tpr_f, 'TNR_m': tnr_m, 'TNR_f': tnr_f}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _class in CLASS_NAMES:\n",
    "    for _model in MODEL_NAMES:\n",
    "        tpr_1 = compute_tpr(df, _class, _model)\n",
    "        tpr_2, _ = compute_tpr_tnr(df, _class, _model)\n",
    "        assert tpr_1 == tpr_2, '{} != {}'.format(tpr_1, tpr_2)\n",
    "        #print('{} == {}'.format(tpr_1, tpr_2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "tpr_df = pd.DataFrame()\n",
    "for _class in frac_female_df.label:\n",
    "    row = {}\n",
    "    row['label'] = _class\n",
    "    for _model, _model_type in MODEL_NAMES.items():\n",
    "        tpr, tnr = compute_tpr_tnr(df, _class, _model)\n",
    "        row['{}_tpr'.format(_model_type)] = tpr\n",
    "        row['{}_tnr'.format(_model_type)] = tnr\n",
    "        gender_trs = compute_tr_by_gender(df, _class, _model)\n",
    "        row['{}_tpr_F'.format(_model_type)] = gender_trs['TPR_f']\n",
    "        row['{}_tpr_M'.format(_model_type)] = gender_trs['TPR_m']\n",
    "        row['{}_tpr_gender_gap'.format(_model_type)] = gender_trs['TPR_f'] - gender_trs['TPR_m']\n",
    "        row['{}_tnr_F'.format(_model_type)] = gender_trs['TNR_f']\n",
    "        row['{}_tnr_M'.format(_model_type)] = gender_trs['TNR_m']\n",
    "        row['{}_tnr_gender_gap'.format(_model_type)] = gender_trs['TNR_f'] - gender_trs['TNR_m']\n",
    "    tpr_df = tpr_df.append(row, ignore_index = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "results_df = pd.merge(tpr_df, frac_female_df, on = 'label')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "TITLE_LABELS = [\n",
    "    'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',\n",
    "    'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',\n",
    "    'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',\n",
    "    'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',\n",
    "    'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "results_df['label_profession'] = results_df['label'].apply(lambda x: TITLE_LABELS[int(x)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>frac_female</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>frac_female</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>debiased_biosbias_tpr_gender_gap</th>\n",
       "      <td>0.829982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>very_scrubbed_tpr_gender_gap</th>\n",
       "      <td>0.458378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>debiased_tolga_tpr_gender_gap</th>\n",
       "      <td>0.824882</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_1_tpr_gender_gap</th>\n",
       "      <td>0.716922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_no_projection_tpr_gender_gap</th>\n",
       "      <td>0.709000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_2_tpr_gender_gap</th>\n",
       "      <td>0.596896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_no_equalize_tpr_gender_gap</th>\n",
       "      <td>0.772645</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glove_tpr_gender_gap</th>\n",
       "      <td>0.794059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_4_tpr_gender_gap</th>\n",
       "      <td>0.550435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_3_tpr_gender_gap</th>\n",
       "      <td>0.707174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scrubbed_tpr_gender_gap</th>\n",
       "      <td>-0.282919</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     frac_female\n",
       "frac_female                             1.000000\n",
       "debiased_biosbias_tpr_gender_gap        0.829982\n",
       "very_scrubbed_tpr_gender_gap            0.458378\n",
       "debiased_tolga_tpr_gender_gap           0.824882\n",
       "strong_debiased_1_tpr_gender_gap        0.716922\n",
       "strong_no_projection_tpr_gender_gap     0.709000\n",
       "strong_debiased_2_tpr_gender_gap        0.596896\n",
       "strong_no_equalize_tpr_gender_gap       0.772645\n",
       "glove_tpr_gender_gap                    0.794059\n",
       "strong_debiased_4_tpr_gender_gap        0.550435\n",
       "strong_debiased_3_tpr_gender_gap        0.707174\n",
       "scrubbed_tpr_gender_gap                -0.282919"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "tpr_gender_gap_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\n",
    "tnr_gender_gap_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "gender_gap_df = results_df[['label_profession', 'frac_female']+tpr_gender_gap_cols+tnr_gender_gap_cols]\n",
    "#gender_gap_df.columns = ['label_profession', 'frac_female']+['{}'.format(_model) for _model in MODEL_NAMES.values()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label_profession</th>\n",
       "      <th>frac_female</th>\n",
       "      <th>debiased_biosbias_tpr_gender_gap</th>\n",
       "      <th>very_scrubbed_tpr_gender_gap</th>\n",
       "      <th>debiased_tolga_tpr_gender_gap</th>\n",
       "      <th>strong_debiased_1_tpr_gender_gap</th>\n",
       "      <th>strong_no_projection_tpr_gender_gap</th>\n",
       "      <th>strong_debiased_2_tpr_gender_gap</th>\n",
       "      <th>strong_no_equalize_tpr_gender_gap</th>\n",
       "      <th>glove_tpr_gender_gap</th>\n",
       "      <th>...</th>\n",
       "      <th>very_scrubbed_tnr_gender_gap</th>\n",
       "      <th>debiased_tolga_tnr_gender_gap</th>\n",
       "      <th>strong_debiased_1_tnr_gender_gap</th>\n",
       "      <th>strong_no_projection_tnr_gender_gap</th>\n",
       "      <th>strong_debiased_2_tnr_gender_gap</th>\n",
       "      <th>strong_no_equalize_tnr_gender_gap</th>\n",
       "      <th>glove_tnr_gender_gap</th>\n",
       "      <th>strong_debiased_4_tnr_gender_gap</th>\n",
       "      <th>strong_debiased_3_tnr_gender_gap</th>\n",
       "      <th>scrubbed_tnr_gender_gap</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>dietitian</td>\n",
       "      <td>0.920437</td>\n",
       "      <td>0.290927</td>\n",
       "      <td>0.173878</td>\n",
       "      <td>0.297707</td>\n",
       "      <td>0.199900</td>\n",
       "      <td>0.223862</td>\n",
       "      <td>0.187072</td>\n",
       "      <td>0.250980</td>\n",
       "      <td>0.232835</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.000674</td>\n",
       "      <td>-0.002692</td>\n",
       "      <td>-0.001220</td>\n",
       "      <td>-0.001617</td>\n",
       "      <td>-0.001530</td>\n",
       "      <td>-0.002213</td>\n",
       "      <td>-0.002810</td>\n",
       "      <td>-0.001630</td>\n",
       "      <td>-0.001412</td>\n",
       "      <td>-0.000266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>nurse</td>\n",
       "      <td>0.914502</td>\n",
       "      <td>0.082735</td>\n",
       "      <td>0.013742</td>\n",
       "      <td>0.085377</td>\n",
       "      <td>0.048740</td>\n",
       "      <td>0.033271</td>\n",
       "      <td>0.025981</td>\n",
       "      <td>0.057404</td>\n",
       "      <td>0.082411</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.001686</td>\n",
       "      <td>-0.007627</td>\n",
       "      <td>-0.007427</td>\n",
       "      <td>-0.004478</td>\n",
       "      <td>-0.004807</td>\n",
       "      <td>-0.004455</td>\n",
       "      <td>-0.005866</td>\n",
       "      <td>-0.002840</td>\n",
       "      <td>-0.002707</td>\n",
       "      <td>-0.001573</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>paralegal</td>\n",
       "      <td>0.866109</td>\n",
       "      <td>0.375755</td>\n",
       "      <td>0.094656</td>\n",
       "      <td>0.317482</td>\n",
       "      <td>0.262077</td>\n",
       "      <td>0.256944</td>\n",
       "      <td>0.271437</td>\n",
       "      <td>0.314915</td>\n",
       "      <td>0.271437</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000247</td>\n",
       "      <td>-0.000103</td>\n",
       "      <td>0.000095</td>\n",
       "      <td>-0.000012</td>\n",
       "      <td>0.000075</td>\n",
       "      <td>-0.000219</td>\n",
       "      <td>-0.000164</td>\n",
       "      <td>-0.000060</td>\n",
       "      <td>-0.000236</td>\n",
       "      <td>0.000195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>yoga_teacher</td>\n",
       "      <td>0.858696</td>\n",
       "      <td>0.276534</td>\n",
       "      <td>0.005518</td>\n",
       "      <td>0.143784</td>\n",
       "      <td>0.208049</td>\n",
       "      <td>0.116196</td>\n",
       "      <td>0.195067</td>\n",
       "      <td>0.161636</td>\n",
       "      <td>0.208374</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000535</td>\n",
       "      <td>-0.001455</td>\n",
       "      <td>-0.001289</td>\n",
       "      <td>-0.000758</td>\n",
       "      <td>-0.001393</td>\n",
       "      <td>-0.001211</td>\n",
       "      <td>-0.001211</td>\n",
       "      <td>-0.001081</td>\n",
       "      <td>-0.001399</td>\n",
       "      <td>0.000299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>model</td>\n",
       "      <td>0.818988</td>\n",
       "      <td>0.480652</td>\n",
       "      <td>0.176120</td>\n",
       "      <td>0.544309</td>\n",
       "      <td>0.418456</td>\n",
       "      <td>0.460211</td>\n",
       "      <td>0.455824</td>\n",
       "      <td>0.532551</td>\n",
       "      <td>0.505093</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.001022</td>\n",
       "      <td>-0.000566</td>\n",
       "      <td>0.000379</td>\n",
       "      <td>0.000429</td>\n",
       "      <td>-0.000039</td>\n",
       "      <td>-0.000513</td>\n",
       "      <td>-0.001008</td>\n",
       "      <td>0.000249</td>\n",
       "      <td>0.000181</td>\n",
       "      <td>0.001161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>interior_designer</td>\n",
       "      <td>0.782609</td>\n",
       "      <td>0.182716</td>\n",
       "      <td>-0.013580</td>\n",
       "      <td>0.243210</td>\n",
       "      <td>0.081481</td>\n",
       "      <td>0.096296</td>\n",
       "      <td>0.041975</td>\n",
       "      <td>0.224691</td>\n",
       "      <td>0.270370</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000226</td>\n",
       "      <td>-0.000884</td>\n",
       "      <td>0.000032</td>\n",
       "      <td>-0.000023</td>\n",
       "      <td>0.000024</td>\n",
       "      <td>-0.000676</td>\n",
       "      <td>-0.000201</td>\n",
       "      <td>0.000216</td>\n",
       "      <td>-0.000013</td>\n",
       "      <td>0.000407</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>psychologist</td>\n",
       "      <td>0.620751</td>\n",
       "      <td>0.000799</td>\n",
       "      <td>0.008890</td>\n",
       "      <td>0.045876</td>\n",
       "      <td>0.043524</td>\n",
       "      <td>0.045169</td>\n",
       "      <td>0.020219</td>\n",
       "      <td>0.042056</td>\n",
       "      <td>0.017593</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.000742</td>\n",
       "      <td>-0.005913</td>\n",
       "      <td>-0.004535</td>\n",
       "      <td>-0.002672</td>\n",
       "      <td>-0.002096</td>\n",
       "      <td>-0.004275</td>\n",
       "      <td>-0.002278</td>\n",
       "      <td>-0.003760</td>\n",
       "      <td>-0.002820</td>\n",
       "      <td>-0.001450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>teacher</td>\n",
       "      <td>0.604382</td>\n",
       "      <td>0.111221</td>\n",
       "      <td>0.025352</td>\n",
       "      <td>0.129299</td>\n",
       "      <td>0.111760</td>\n",
       "      <td>0.113756</td>\n",
       "      <td>0.114246</td>\n",
       "      <td>0.119168</td>\n",
       "      <td>0.137121</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.000813</td>\n",
       "      <td>-0.004694</td>\n",
       "      <td>-0.002497</td>\n",
       "      <td>-0.004570</td>\n",
       "      <td>-0.001141</td>\n",
       "      <td>-0.002609</td>\n",
       "      <td>-0.002664</td>\n",
       "      <td>-0.002461</td>\n",
       "      <td>-0.001785</td>\n",
       "      <td>0.000671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>journalist</td>\n",
       "      <td>0.492152</td>\n",
       "      <td>0.019865</td>\n",
       "      <td>0.010182</td>\n",
       "      <td>0.057554</td>\n",
       "      <td>0.021920</td>\n",
       "      <td>0.001790</td>\n",
       "      <td>0.013070</td>\n",
       "      <td>0.042923</td>\n",
       "      <td>0.058686</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000762</td>\n",
       "      <td>-0.000167</td>\n",
       "      <td>0.001286</td>\n",
       "      <td>0.001514</td>\n",
       "      <td>0.001955</td>\n",
       "      <td>0.000651</td>\n",
       "      <td>0.000014</td>\n",
       "      <td>0.001617</td>\n",
       "      <td>0.001571</td>\n",
       "      <td>-0.002623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>physician</td>\n",
       "      <td>0.491203</td>\n",
       "      <td>0.019845</td>\n",
       "      <td>0.036850</td>\n",
       "      <td>0.056989</td>\n",
       "      <td>0.035120</td>\n",
       "      <td>0.042554</td>\n",
       "      <td>0.040719</td>\n",
       "      <td>0.034896</td>\n",
       "      <td>0.024797</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001413</td>\n",
       "      <td>0.005790</td>\n",
       "      <td>0.006125</td>\n",
       "      <td>0.006385</td>\n",
       "      <td>0.006968</td>\n",
       "      <td>0.004761</td>\n",
       "      <td>0.007537</td>\n",
       "      <td>0.001844</td>\n",
       "      <td>0.004126</td>\n",
       "      <td>0.000307</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>poet</td>\n",
       "      <td>0.483051</td>\n",
       "      <td>-0.044163</td>\n",
       "      <td>0.009395</td>\n",
       "      <td>-0.007190</td>\n",
       "      <td>0.012207</td>\n",
       "      <td>0.006903</td>\n",
       "      <td>-0.006711</td>\n",
       "      <td>0.016393</td>\n",
       "      <td>0.001949</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000827</td>\n",
       "      <td>-0.000845</td>\n",
       "      <td>-0.001125</td>\n",
       "      <td>-0.000642</td>\n",
       "      <td>-0.000209</td>\n",
       "      <td>-0.000453</td>\n",
       "      <td>-0.000933</td>\n",
       "      <td>-0.000733</td>\n",
       "      <td>-0.000761</td>\n",
       "      <td>-0.000698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>personal_trainer</td>\n",
       "      <td>0.468293</td>\n",
       "      <td>-0.080944</td>\n",
       "      <td>-0.011850</td>\n",
       "      <td>-0.068043</td>\n",
       "      <td>0.032397</td>\n",
       "      <td>-0.028670</td>\n",
       "      <td>-0.037557</td>\n",
       "      <td>-0.091361</td>\n",
       "      <td>-0.049694</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.000783</td>\n",
       "      <td>-0.000399</td>\n",
       "      <td>-0.001005</td>\n",
       "      <td>-0.000138</td>\n",
       "      <td>-0.000473</td>\n",
       "      <td>-0.000470</td>\n",
       "      <td>-0.000456</td>\n",
       "      <td>-0.000816</td>\n",
       "      <td>-0.000737</td>\n",
       "      <td>0.000032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>professor</td>\n",
       "      <td>0.452428</td>\n",
       "      <td>-0.018119</td>\n",
       "      <td>0.011301</td>\n",
       "      <td>-0.011141</td>\n",
       "      <td>-0.015243</td>\n",
       "      <td>-0.012384</td>\n",
       "      <td>0.002382</td>\n",
       "      <td>-0.004640</td>\n",
       "      <td>-0.002251</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.001640</td>\n",
       "      <td>0.001259</td>\n",
       "      <td>0.001349</td>\n",
       "      <td>0.004071</td>\n",
       "      <td>-0.003034</td>\n",
       "      <td>-0.004298</td>\n",
       "      <td>-0.003673</td>\n",
       "      <td>-0.000717</td>\n",
       "      <td>-0.003564</td>\n",
       "      <td>-0.001995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>painter</td>\n",
       "      <td>0.452361</td>\n",
       "      <td>0.003161</td>\n",
       "      <td>0.036012</td>\n",
       "      <td>0.017337</td>\n",
       "      <td>-0.035538</td>\n",
       "      <td>0.012959</td>\n",
       "      <td>0.006991</td>\n",
       "      <td>-0.001613</td>\n",
       "      <td>-0.002095</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.000951</td>\n",
       "      <td>-0.000336</td>\n",
       "      <td>-0.000125</td>\n",
       "      <td>-0.000197</td>\n",
       "      <td>0.000173</td>\n",
       "      <td>0.000315</td>\n",
       "      <td>-0.000022</td>\n",
       "      <td>-0.000223</td>\n",
       "      <td>0.000050</td>\n",
       "      <td>0.000144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>accountant</td>\n",
       "      <td>0.374554</td>\n",
       "      <td>-0.055930</td>\n",
       "      <td>-0.031311</td>\n",
       "      <td>-0.043805</td>\n",
       "      <td>-0.025312</td>\n",
       "      <td>0.000459</td>\n",
       "      <td>-0.015143</td>\n",
       "      <td>-0.044432</td>\n",
       "      <td>-0.060287</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001341</td>\n",
       "      <td>0.000090</td>\n",
       "      <td>0.000390</td>\n",
       "      <td>0.000694</td>\n",
       "      <td>0.000483</td>\n",
       "      <td>0.000683</td>\n",
       "      <td>0.000757</td>\n",
       "      <td>0.000344</td>\n",
       "      <td>0.000355</td>\n",
       "      <td>-0.000109</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>attorney</td>\n",
       "      <td>0.367104</td>\n",
       "      <td>-0.035824</td>\n",
       "      <td>-0.003903</td>\n",
       "      <td>-0.007270</td>\n",
       "      <td>0.007254</td>\n",
       "      <td>0.013928</td>\n",
       "      <td>0.004176</td>\n",
       "      <td>-0.010897</td>\n",
       "      <td>-0.004719</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003337</td>\n",
       "      <td>-0.000622</td>\n",
       "      <td>-0.001509</td>\n",
       "      <td>-0.001953</td>\n",
       "      <td>-0.001427</td>\n",
       "      <td>-0.001875</td>\n",
       "      <td>-0.002338</td>\n",
       "      <td>-0.002469</td>\n",
       "      <td>-0.001728</td>\n",
       "      <td>-0.000642</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>photographer</td>\n",
       "      <td>0.356927</td>\n",
       "      <td>-0.052775</td>\n",
       "      <td>-0.011488</td>\n",
       "      <td>-0.036094</td>\n",
       "      <td>-0.004054</td>\n",
       "      <td>-0.017355</td>\n",
       "      <td>-0.004763</td>\n",
       "      <td>-0.035910</td>\n",
       "      <td>-0.031379</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001568</td>\n",
       "      <td>0.000615</td>\n",
       "      <td>-0.000121</td>\n",
       "      <td>-0.000051</td>\n",
       "      <td>-0.000025</td>\n",
       "      <td>0.001542</td>\n",
       "      <td>0.001537</td>\n",
       "      <td>-0.000092</td>\n",
       "      <td>0.000170</td>\n",
       "      <td>-0.000297</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>dentist</td>\n",
       "      <td>0.345824</td>\n",
       "      <td>0.009651</td>\n",
       "      <td>-0.040738</td>\n",
       "      <td>0.003124</td>\n",
       "      <td>0.013102</td>\n",
       "      <td>0.014166</td>\n",
       "      <td>0.008104</td>\n",
       "      <td>0.017242</td>\n",
       "      <td>0.015563</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000945</td>\n",
       "      <td>0.000738</td>\n",
       "      <td>0.000532</td>\n",
       "      <td>0.000574</td>\n",
       "      <td>0.000563</td>\n",
       "      <td>0.000409</td>\n",
       "      <td>0.000801</td>\n",
       "      <td>0.000516</td>\n",
       "      <td>0.000359</td>\n",
       "      <td>0.000283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>filmmaker</td>\n",
       "      <td>0.322148</td>\n",
       "      <td>-0.005893</td>\n",
       "      <td>-0.023485</td>\n",
       "      <td>-0.017356</td>\n",
       "      <td>0.038690</td>\n",
       "      <td>0.032797</td>\n",
       "      <td>0.018358</td>\n",
       "      <td>-0.019507</td>\n",
       "      <td>-0.001827</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000127</td>\n",
       "      <td>0.002068</td>\n",
       "      <td>0.001236</td>\n",
       "      <td>0.001535</td>\n",
       "      <td>0.001641</td>\n",
       "      <td>0.001847</td>\n",
       "      <td>0.002094</td>\n",
       "      <td>0.001204</td>\n",
       "      <td>0.000814</td>\n",
       "      <td>0.002261</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chiropractor</td>\n",
       "      <td>0.298824</td>\n",
       "      <td>-0.025604</td>\n",
       "      <td>-0.004360</td>\n",
       "      <td>-0.073746</td>\n",
       "      <td>-0.023146</td>\n",
       "      <td>0.024071</td>\n",
       "      <td>-0.021350</td>\n",
       "      <td>-0.011547</td>\n",
       "      <td>-0.028457</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.000152</td>\n",
       "      <td>0.000921</td>\n",
       "      <td>0.000345</td>\n",
       "      <td>0.000175</td>\n",
       "      <td>0.000617</td>\n",
       "      <td>0.000419</td>\n",
       "      <td>0.000127</td>\n",
       "      <td>0.000096</td>\n",
       "      <td>0.000309</td>\n",
       "      <td>0.000236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>pastor</td>\n",
       "      <td>0.229282</td>\n",
       "      <td>-0.274172</td>\n",
       "      <td>-0.069785</td>\n",
       "      <td>-0.259533</td>\n",
       "      <td>-0.096731</td>\n",
       "      <td>-0.127909</td>\n",
       "      <td>-0.156583</td>\n",
       "      <td>-0.218206</td>\n",
       "      <td>-0.166127</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.000073</td>\n",
       "      <td>0.001051</td>\n",
       "      <td>0.000741</td>\n",
       "      <td>0.000602</td>\n",
       "      <td>0.000453</td>\n",
       "      <td>0.001137</td>\n",
       "      <td>0.001293</td>\n",
       "      <td>0.000333</td>\n",
       "      <td>0.000333</td>\n",
       "      <td>0.000199</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>architect</td>\n",
       "      <td>0.225399</td>\n",
       "      <td>0.053551</td>\n",
       "      <td>0.003069</td>\n",
       "      <td>0.003208</td>\n",
       "      <td>0.106769</td>\n",
       "      <td>0.110808</td>\n",
       "      <td>0.073486</td>\n",
       "      <td>-0.005593</td>\n",
       "      <td>0.049996</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.001115</td>\n",
       "      <td>0.004353</td>\n",
       "      <td>0.001076</td>\n",
       "      <td>0.001065</td>\n",
       "      <td>0.002815</td>\n",
       "      <td>0.005941</td>\n",
       "      <td>0.002935</td>\n",
       "      <td>0.002423</td>\n",
       "      <td>0.003652</td>\n",
       "      <td>-0.001770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>comedian</td>\n",
       "      <td>0.219457</td>\n",
       "      <td>-0.225967</td>\n",
       "      <td>-0.081757</td>\n",
       "      <td>-0.156671</td>\n",
       "      <td>-0.065501</td>\n",
       "      <td>-0.076109</td>\n",
       "      <td>-0.087733</td>\n",
       "      <td>-0.118004</td>\n",
       "      <td>-0.124757</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000234</td>\n",
       "      <td>0.000593</td>\n",
       "      <td>-0.000030</td>\n",
       "      <td>0.000366</td>\n",
       "      <td>-0.000133</td>\n",
       "      <td>0.000633</td>\n",
       "      <td>0.000753</td>\n",
       "      <td>0.000327</td>\n",
       "      <td>-0.000055</td>\n",
       "      <td>0.000581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>software_engineer</td>\n",
       "      <td>0.157746</td>\n",
       "      <td>-0.065456</td>\n",
       "      <td>0.023591</td>\n",
       "      <td>-0.056956</td>\n",
       "      <td>-0.042324</td>\n",
       "      <td>-0.060300</td>\n",
       "      <td>-0.021202</td>\n",
       "      <td>0.015468</td>\n",
       "      <td>-0.036829</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001209</td>\n",
       "      <td>0.005100</td>\n",
       "      <td>0.009260</td>\n",
       "      <td>0.007132</td>\n",
       "      <td>0.006266</td>\n",
       "      <td>0.003512</td>\n",
       "      <td>0.006474</td>\n",
       "      <td>0.004885</td>\n",
       "      <td>0.004344</td>\n",
       "      <td>-0.000074</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>surgeon</td>\n",
       "      <td>0.153592</td>\n",
       "      <td>-0.229816</td>\n",
       "      <td>-0.051839</td>\n",
       "      <td>-0.245461</td>\n",
       "      <td>-0.122859</td>\n",
       "      <td>-0.127233</td>\n",
       "      <td>-0.089205</td>\n",
       "      <td>-0.220015</td>\n",
       "      <td>-0.207968</td>\n",
       "      <td>...</td>\n",
       "      <td>0.002435</td>\n",
       "      <td>0.005888</td>\n",
       "      <td>0.004638</td>\n",
       "      <td>0.002488</td>\n",
       "      <td>0.002320</td>\n",
       "      <td>0.004059</td>\n",
       "      <td>0.005013</td>\n",
       "      <td>0.003432</td>\n",
       "      <td>0.003316</td>\n",
       "      <td>0.000101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>composer</td>\n",
       "      <td>0.153186</td>\n",
       "      <td>-0.068712</td>\n",
       "      <td>0.036272</td>\n",
       "      <td>-0.048370</td>\n",
       "      <td>-0.001737</td>\n",
       "      <td>-0.050061</td>\n",
       "      <td>-0.008452</td>\n",
       "      <td>-0.064452</td>\n",
       "      <td>-0.063849</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001154</td>\n",
       "      <td>0.001802</td>\n",
       "      <td>0.001463</td>\n",
       "      <td>0.001543</td>\n",
       "      <td>0.001389</td>\n",
       "      <td>0.001676</td>\n",
       "      <td>0.001567</td>\n",
       "      <td>0.001011</td>\n",
       "      <td>0.001245</td>\n",
       "      <td>0.002464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>dj</td>\n",
       "      <td>0.145299</td>\n",
       "      <td>-0.103824</td>\n",
       "      <td>0.099118</td>\n",
       "      <td>-0.145000</td>\n",
       "      <td>0.027647</td>\n",
       "      <td>-0.083824</td>\n",
       "      <td>0.000882</td>\n",
       "      <td>-0.178824</td>\n",
       "      <td>-0.040588</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000244</td>\n",
       "      <td>0.001133</td>\n",
       "      <td>0.000257</td>\n",
       "      <td>0.000211</td>\n",
       "      <td>0.000721</td>\n",
       "      <td>0.000564</td>\n",
       "      <td>0.000206</td>\n",
       "      <td>0.000826</td>\n",
       "      <td>0.000299</td>\n",
       "      <td>-0.000025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>rapper</td>\n",
       "      <td>0.085859</td>\n",
       "      <td>-0.138772</td>\n",
       "      <td>0.047449</td>\n",
       "      <td>-0.096198</td>\n",
       "      <td>0.017225</td>\n",
       "      <td>0.030224</td>\n",
       "      <td>0.175496</td>\n",
       "      <td>0.019175</td>\n",
       "      <td>-0.012350</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000577</td>\n",
       "      <td>0.000155</td>\n",
       "      <td>0.000631</td>\n",
       "      <td>0.000491</td>\n",
       "      <td>0.000268</td>\n",
       "      <td>0.000834</td>\n",
       "      <td>0.001200</td>\n",
       "      <td>0.000645</td>\n",
       "      <td>0.000180</td>\n",
       "      <td>-0.000048</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>28 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     label_profession  frac_female  debiased_biosbias_tpr_gender_gap  \\\n",
       "7           dietitian     0.920437                          0.290927   \n",
       "13              nurse     0.914502                          0.082735   \n",
       "15          paralegal     0.866109                          0.375755   \n",
       "27       yoga_teacher     0.858696                          0.276534   \n",
       "12              model     0.818988                          0.480652   \n",
       "10  interior_designer     0.782609                          0.182716   \n",
       "22       psychologist     0.620751                          0.000799   \n",
       "26            teacher     0.604382                          0.111221   \n",
       "11         journalist     0.492152                          0.019865   \n",
       "19          physician     0.491203                          0.019845   \n",
       "20               poet     0.483051                         -0.044163   \n",
       "17   personal_trainer     0.468293                         -0.080944   \n",
       "21          professor     0.452428                         -0.018119   \n",
       "14            painter     0.452361                          0.003161   \n",
       "0          accountant     0.374554                         -0.055930   \n",
       "2            attorney     0.367104                         -0.035824   \n",
       "18       photographer     0.356927                         -0.052775   \n",
       "6             dentist     0.345824                          0.009651   \n",
       "9           filmmaker     0.322148                         -0.005893   \n",
       "3        chiropractor     0.298824                         -0.025604   \n",
       "16             pastor     0.229282                         -0.274172   \n",
       "1           architect     0.225399                          0.053551   \n",
       "4            comedian     0.219457                         -0.225967   \n",
       "24  software_engineer     0.157746                         -0.065456   \n",
       "25            surgeon     0.153592                         -0.229816   \n",
       "5            composer     0.153186                         -0.068712   \n",
       "8                  dj     0.145299                         -0.103824   \n",
       "23             rapper     0.085859                         -0.138772   \n",
       "\n",
       "    very_scrubbed_tpr_gender_gap  debiased_tolga_tpr_gender_gap  \\\n",
       "7                       0.173878                       0.297707   \n",
       "13                      0.013742                       0.085377   \n",
       "15                      0.094656                       0.317482   \n",
       "27                      0.005518                       0.143784   \n",
       "12                      0.176120                       0.544309   \n",
       "10                     -0.013580                       0.243210   \n",
       "22                      0.008890                       0.045876   \n",
       "26                      0.025352                       0.129299   \n",
       "11                      0.010182                       0.057554   \n",
       "19                      0.036850                       0.056989   \n",
       "20                      0.009395                      -0.007190   \n",
       "17                     -0.011850                      -0.068043   \n",
       "21                      0.011301                      -0.011141   \n",
       "14                      0.036012                       0.017337   \n",
       "0                      -0.031311                      -0.043805   \n",
       "2                      -0.003903                      -0.007270   \n",
       "18                     -0.011488                      -0.036094   \n",
       "6                      -0.040738                       0.003124   \n",
       "9                      -0.023485                      -0.017356   \n",
       "3                      -0.004360                      -0.073746   \n",
       "16                     -0.069785                      -0.259533   \n",
       "1                       0.003069                       0.003208   \n",
       "4                      -0.081757                      -0.156671   \n",
       "24                      0.023591                      -0.056956   \n",
       "25                     -0.051839                      -0.245461   \n",
       "5                       0.036272                      -0.048370   \n",
       "8                       0.099118                      -0.145000   \n",
       "23                      0.047449                      -0.096198   \n",
       "\n",
       "    strong_debiased_1_tpr_gender_gap  strong_no_projection_tpr_gender_gap  \\\n",
       "7                           0.199900                             0.223862   \n",
       "13                          0.048740                             0.033271   \n",
       "15                          0.262077                             0.256944   \n",
       "27                          0.208049                             0.116196   \n",
       "12                          0.418456                             0.460211   \n",
       "10                          0.081481                             0.096296   \n",
       "22                          0.043524                             0.045169   \n",
       "26                          0.111760                             0.113756   \n",
       "11                          0.021920                             0.001790   \n",
       "19                          0.035120                             0.042554   \n",
       "20                          0.012207                             0.006903   \n",
       "17                          0.032397                            -0.028670   \n",
       "21                         -0.015243                            -0.012384   \n",
       "14                         -0.035538                             0.012959   \n",
       "0                          -0.025312                             0.000459   \n",
       "2                           0.007254                             0.013928   \n",
       "18                         -0.004054                            -0.017355   \n",
       "6                           0.013102                             0.014166   \n",
       "9                           0.038690                             0.032797   \n",
       "3                          -0.023146                             0.024071   \n",
       "16                         -0.096731                            -0.127909   \n",
       "1                           0.106769                             0.110808   \n",
       "4                          -0.065501                            -0.076109   \n",
       "24                         -0.042324                            -0.060300   \n",
       "25                         -0.122859                            -0.127233   \n",
       "5                          -0.001737                            -0.050061   \n",
       "8                           0.027647                            -0.083824   \n",
       "23                          0.017225                             0.030224   \n",
       "\n",
       "    strong_debiased_2_tpr_gender_gap  strong_no_equalize_tpr_gender_gap  \\\n",
       "7                           0.187072                           0.250980   \n",
       "13                          0.025981                           0.057404   \n",
       "15                          0.271437                           0.314915   \n",
       "27                          0.195067                           0.161636   \n",
       "12                          0.455824                           0.532551   \n",
       "10                          0.041975                           0.224691   \n",
       "22                          0.020219                           0.042056   \n",
       "26                          0.114246                           0.119168   \n",
       "11                          0.013070                           0.042923   \n",
       "19                          0.040719                           0.034896   \n",
       "20                         -0.006711                           0.016393   \n",
       "17                         -0.037557                          -0.091361   \n",
       "21                          0.002382                          -0.004640   \n",
       "14                          0.006991                          -0.001613   \n",
       "0                          -0.015143                          -0.044432   \n",
       "2                           0.004176                          -0.010897   \n",
       "18                         -0.004763                          -0.035910   \n",
       "6                           0.008104                           0.017242   \n",
       "9                           0.018358                          -0.019507   \n",
       "3                          -0.021350                          -0.011547   \n",
       "16                         -0.156583                          -0.218206   \n",
       "1                           0.073486                          -0.005593   \n",
       "4                          -0.087733                          -0.118004   \n",
       "24                         -0.021202                           0.015468   \n",
       "25                         -0.089205                          -0.220015   \n",
       "5                          -0.008452                          -0.064452   \n",
       "8                           0.000882                          -0.178824   \n",
       "23                          0.175496                           0.019175   \n",
       "\n",
       "    glove_tpr_gender_gap           ...             \\\n",
       "7               0.232835           ...              \n",
       "13              0.082411           ...              \n",
       "15              0.271437           ...              \n",
       "27              0.208374           ...              \n",
       "12              0.505093           ...              \n",
       "10              0.270370           ...              \n",
       "22              0.017593           ...              \n",
       "26              0.137121           ...              \n",
       "11              0.058686           ...              \n",
       "19              0.024797           ...              \n",
       "20              0.001949           ...              \n",
       "17             -0.049694           ...              \n",
       "21             -0.002251           ...              \n",
       "14             -0.002095           ...              \n",
       "0              -0.060287           ...              \n",
       "2              -0.004719           ...              \n",
       "18             -0.031379           ...              \n",
       "6               0.015563           ...              \n",
       "9              -0.001827           ...              \n",
       "3              -0.028457           ...              \n",
       "16             -0.166127           ...              \n",
       "1               0.049996           ...              \n",
       "4              -0.124757           ...              \n",
       "24             -0.036829           ...              \n",
       "25             -0.207968           ...              \n",
       "5              -0.063849           ...              \n",
       "8              -0.040588           ...              \n",
       "23             -0.012350           ...              \n",
       "\n",
       "    very_scrubbed_tnr_gender_gap  debiased_tolga_tnr_gender_gap  \\\n",
       "7                      -0.000674                      -0.002692   \n",
       "13                     -0.001686                      -0.007627   \n",
       "15                      0.000247                      -0.000103   \n",
       "27                      0.000535                      -0.001455   \n",
       "12                     -0.001022                      -0.000566   \n",
       "10                      0.000226                      -0.000884   \n",
       "22                     -0.000742                      -0.005913   \n",
       "26                     -0.000813                      -0.004694   \n",
       "11                      0.000762                      -0.000167   \n",
       "19                      0.001413                       0.005790   \n",
       "20                      0.000827                      -0.000845   \n",
       "17                     -0.000783                      -0.000399   \n",
       "21                     -0.001640                       0.001259   \n",
       "14                     -0.000951                      -0.000336   \n",
       "0                       0.001341                       0.000090   \n",
       "2                       0.003337                      -0.000622   \n",
       "18                      0.001568                       0.000615   \n",
       "6                       0.000945                       0.000738   \n",
       "9                       0.000127                       0.002068   \n",
       "3                      -0.000152                       0.000921   \n",
       "16                     -0.000073                       0.001051   \n",
       "1                      -0.001115                       0.004353   \n",
       "4                       0.000234                       0.000593   \n",
       "24                      0.001209                       0.005100   \n",
       "25                      0.002435                       0.005888   \n",
       "5                       0.001154                       0.001802   \n",
       "8                       0.000244                       0.001133   \n",
       "23                      0.000577                       0.000155   \n",
       "\n",
       "    strong_debiased_1_tnr_gender_gap  strong_no_projection_tnr_gender_gap  \\\n",
       "7                          -0.001220                            -0.001617   \n",
       "13                         -0.007427                            -0.004478   \n",
       "15                          0.000095                            -0.000012   \n",
       "27                         -0.001289                            -0.000758   \n",
       "12                          0.000379                             0.000429   \n",
       "10                          0.000032                            -0.000023   \n",
       "22                         -0.004535                            -0.002672   \n",
       "26                         -0.002497                            -0.004570   \n",
       "11                          0.001286                             0.001514   \n",
       "19                          0.006125                             0.006385   \n",
       "20                         -0.001125                            -0.000642   \n",
       "17                         -0.001005                            -0.000138   \n",
       "21                          0.001349                             0.004071   \n",
       "14                         -0.000125                            -0.000197   \n",
       "0                           0.000390                             0.000694   \n",
       "2                          -0.001509                            -0.001953   \n",
       "18                         -0.000121                            -0.000051   \n",
       "6                           0.000532                             0.000574   \n",
       "9                           0.001236                             0.001535   \n",
       "3                           0.000345                             0.000175   \n",
       "16                          0.000741                             0.000602   \n",
       "1                           0.001076                             0.001065   \n",
       "4                          -0.000030                             0.000366   \n",
       "24                          0.009260                             0.007132   \n",
       "25                          0.004638                             0.002488   \n",
       "5                           0.001463                             0.001543   \n",
       "8                           0.000257                             0.000211   \n",
       "23                          0.000631                             0.000491   \n",
       "\n",
       "    strong_debiased_2_tnr_gender_gap  strong_no_equalize_tnr_gender_gap  \\\n",
       "7                          -0.001530                          -0.002213   \n",
       "13                         -0.004807                          -0.004455   \n",
       "15                          0.000075                          -0.000219   \n",
       "27                         -0.001393                          -0.001211   \n",
       "12                         -0.000039                          -0.000513   \n",
       "10                          0.000024                          -0.000676   \n",
       "22                         -0.002096                          -0.004275   \n",
       "26                         -0.001141                          -0.002609   \n",
       "11                          0.001955                           0.000651   \n",
       "19                          0.006968                           0.004761   \n",
       "20                         -0.000209                          -0.000453   \n",
       "17                         -0.000473                          -0.000470   \n",
       "21                         -0.003034                          -0.004298   \n",
       "14                          0.000173                           0.000315   \n",
       "0                           0.000483                           0.000683   \n",
       "2                          -0.001427                          -0.001875   \n",
       "18                         -0.000025                           0.001542   \n",
       "6                           0.000563                           0.000409   \n",
       "9                           0.001641                           0.001847   \n",
       "3                           0.000617                           0.000419   \n",
       "16                          0.000453                           0.001137   \n",
       "1                           0.002815                           0.005941   \n",
       "4                          -0.000133                           0.000633   \n",
       "24                          0.006266                           0.003512   \n",
       "25                          0.002320                           0.004059   \n",
       "5                           0.001389                           0.001676   \n",
       "8                           0.000721                           0.000564   \n",
       "23                          0.000268                           0.000834   \n",
       "\n",
       "    glove_tnr_gender_gap  strong_debiased_4_tnr_gender_gap  \\\n",
       "7              -0.002810                         -0.001630   \n",
       "13             -0.005866                         -0.002840   \n",
       "15             -0.000164                         -0.000060   \n",
       "27             -0.001211                         -0.001081   \n",
       "12             -0.001008                          0.000249   \n",
       "10             -0.000201                          0.000216   \n",
       "22             -0.002278                         -0.003760   \n",
       "26             -0.002664                         -0.002461   \n",
       "11              0.000014                          0.001617   \n",
       "19              0.007537                          0.001844   \n",
       "20             -0.000933                         -0.000733   \n",
       "17             -0.000456                         -0.000816   \n",
       "21             -0.003673                         -0.000717   \n",
       "14             -0.000022                         -0.000223   \n",
       "0               0.000757                          0.000344   \n",
       "2              -0.002338                         -0.002469   \n",
       "18              0.001537                         -0.000092   \n",
       "6               0.000801                          0.000516   \n",
       "9               0.002094                          0.001204   \n",
       "3               0.000127                          0.000096   \n",
       "16              0.001293                          0.000333   \n",
       "1               0.002935                          0.002423   \n",
       "4               0.000753                          0.000327   \n",
       "24              0.006474                          0.004885   \n",
       "25              0.005013                          0.003432   \n",
       "5               0.001567                          0.001011   \n",
       "8               0.000206                          0.000826   \n",
       "23              0.001200                          0.000645   \n",
       "\n",
       "    strong_debiased_3_tnr_gender_gap  scrubbed_tnr_gender_gap  \n",
       "7                          -0.001412                -0.000266  \n",
       "13                         -0.002707                -0.001573  \n",
       "15                         -0.000236                 0.000195  \n",
       "27                         -0.001399                 0.000299  \n",
       "12                          0.000181                 0.001161  \n",
       "10                         -0.000013                 0.000407  \n",
       "22                         -0.002820                -0.001450  \n",
       "26                         -0.001785                 0.000671  \n",
       "11                          0.001571                -0.002623  \n",
       "19                          0.004126                 0.000307  \n",
       "20                         -0.000761                -0.000698  \n",
       "17                         -0.000737                 0.000032  \n",
       "21                         -0.003564                -0.001995  \n",
       "14                          0.000050                 0.000144  \n",
       "0                           0.000355                -0.000109  \n",
       "2                          -0.001728                -0.000642  \n",
       "18                          0.000170                -0.000297  \n",
       "6                           0.000359                 0.000283  \n",
       "9                           0.000814                 0.002261  \n",
       "3                           0.000309                 0.000236  \n",
       "16                          0.000333                 0.000199  \n",
       "1                           0.003652                -0.001770  \n",
       "4                          -0.000055                 0.000581  \n",
       "24                          0.004344                -0.000074  \n",
       "25                          0.003316                 0.000101  \n",
       "5                           0.001245                 0.002464  \n",
       "8                           0.000299                -0.000025  \n",
       "23                          0.000180                -0.000048  \n",
       "\n",
       "[28 rows x 24 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gender_gap_df.sort_values('frac_female', ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fraction of comments where new model has lower\n",
    "# TPR gap than the baseline\n",
    "\n",
    "def compute_fraction_improved(df, baseline_model, improved_model):\n",
    "    is_improved = np.abs(df[baseline_model]) >= np.abs(df[improved_model])\n",
    "    return np.mean(is_improved)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "debiased_biosbias\n",
      "0.32142857142857145\n",
      "very_scrubbed\n",
      "0.7142857142857143\n",
      "debiased_tolga\n",
      "0.2857142857142857\n",
      "strong_debiased_1\n",
      "0.6428571428571429\n",
      "strong_no_projection\n",
      "0.6071428571428571\n",
      "strong_debiased_2\n",
      "0.7142857142857143\n",
      "strong_no_equalize\n",
      "0.39285714285714285\n",
      "glove\n",
      "1.0\n",
      "strong_debiased_4\n",
      "0.6071428571428571\n",
      "strong_debiased_3\n",
      "0.6071428571428571\n",
      "scrubbed\n",
      "0.8571428571428571\n"
     ]
    }
   ],
   "source": [
    "for _model in MODEL_NAMES.values():\n",
    "    print(_model)\n",
    "    print(compute_fraction_improved(gender_gap_df, 'glove_tpr_gender_gap', '{}_tpr_gender_gap'.format(_model)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "tpr_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\n",
    "tnr_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\n",
    "gender_gap_cols = tpr_cols + tnr_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "debiased_biosbias_tpr_gender_gap       0.029446\n",
       "very_scrubbed_tpr_gender_gap           0.003786\n",
       "debiased_tolga_tpr_gender_gap          0.028584\n",
       "strong_debiased_1_tpr_gender_gap       0.014313\n",
       "strong_no_projection_tpr_gender_gap    0.015602\n",
       "strong_debiased_2_tpr_gender_gap       0.016134\n",
       "strong_no_equalize_tpr_gender_gap      0.025152\n",
       "glove_tpr_gender_gap                   0.022636\n",
       "strong_debiased_4_tpr_gender_gap       0.016461\n",
       "strong_debiased_3_tpr_gender_gap       0.014632\n",
       "scrubbed_tpr_gender_gap                0.000189\n",
       "debiased_biosbias_tnr_gender_gap       0.000011\n",
       "very_scrubbed_tnr_gender_gap           0.000001\n",
       "debiased_tolga_tnr_gender_gap          0.000009\n",
       "strong_debiased_1_tnr_gender_gap       0.000009\n",
       "strong_no_projection_tnr_gender_gap    0.000006\n",
       "strong_debiased_2_tnr_gender_gap       0.000006\n",
       "strong_no_equalize_tnr_gender_gap      0.000006\n",
       "glove_tnr_gender_gap                   0.000008\n",
       "strong_debiased_4_tnr_gender_gap       0.000003\n",
       "strong_debiased_3_tnr_gender_gap       0.000004\n",
       "scrubbed_tnr_gender_gap                0.000001\n",
       "dtype: float64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(x**2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "debiased_biosbias_tpr_gender_gap       0.119049\n",
       "very_scrubbed_tpr_gender_gap           0.041268\n",
       "debiased_tolga_tpr_gender_gap          0.114932\n",
       "strong_debiased_1_tpr_gender_gap       0.075670\n",
       "strong_no_projection_tpr_gender_gap    0.079293\n",
       "strong_debiased_2_tpr_gender_gap       0.075149\n",
       "strong_no_equalize_tpr_gender_gap      0.102661\n",
       "glove_tpr_gender_gap                   0.096764\n",
       "strong_debiased_4_tpr_gender_gap       0.083171\n",
       "strong_debiased_3_tpr_gender_gap       0.070882\n",
       "scrubbed_tpr_gender_gap                0.007773\n",
       "debiased_biosbias_tnr_gender_gap       0.002204\n",
       "very_scrubbed_tnr_gender_gap           0.000958\n",
       "debiased_tolga_tnr_gender_gap          0.002066\n",
       "strong_debiased_1_tnr_gender_gap       0.001811\n",
       "strong_no_projection_tnr_gender_gap    0.001657\n",
       "strong_debiased_2_tnr_gender_gap       0.001537\n",
       "strong_no_equalize_tnr_gender_gap      0.001866\n",
       "glove_tnr_gender_gap                   0.001997\n",
       "strong_debiased_4_tnr_gender_gap       0.001316\n",
       "strong_debiased_3_tnr_gender_gap       0.001376\n",
       "scrubbed_tnr_gender_gap                0.000747\n",
       "dtype: float64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(np.abs(x)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_tpr_gap(df, _model):\n",
    "    fig, ax = plt.subplots(figsize=(15, 6))\n",
    "    x = 'frac_female'\n",
    "    y = '{}_tpr_gender_gap'.format(_model)\n",
    "    p1 = sns.regplot(x = x, y = y, data = df)\n",
    "    p1.set(xlabel = \"% Female\", ylabel = \"TPR Gender Gap\", title = _model)\n",
    "\n",
    "    for line in range(0,df.shape[0]):\n",
    "         p1.text(results_df[x][line]+0.01, df[y][line], df['label_profession'][line], horizontalalignment='left', size='medium', color='black')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _model in MODEL_NAMES.values():\n",
    "    if 'untuned' in _model:\n",
    "        plot_tpr_gap(results_df, _model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>frac_female</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>frac_female</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>debiased_biosbias_tpr_gender_gap</th>\n",
       "      <td>0.829982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>very_scrubbed_tpr_gender_gap</th>\n",
       "      <td>0.458378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>debiased_tolga_tpr_gender_gap</th>\n",
       "      <td>0.824882</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_1_tpr_gender_gap</th>\n",
       "      <td>0.716922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_no_projection_tpr_gender_gap</th>\n",
       "      <td>0.709000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_2_tpr_gender_gap</th>\n",
       "      <td>0.596896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_no_equalize_tpr_gender_gap</th>\n",
       "      <td>0.772645</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>glove_tpr_gender_gap</th>\n",
       "      <td>0.794059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_4_tpr_gender_gap</th>\n",
       "      <td>0.550435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>strong_debiased_3_tpr_gender_gap</th>\n",
       "      <td>0.707174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scrubbed_tpr_gender_gap</th>\n",
       "      <td>-0.282919</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     frac_female\n",
       "frac_female                             1.000000\n",
       "debiased_biosbias_tpr_gender_gap        0.829982\n",
       "very_scrubbed_tpr_gender_gap            0.458378\n",
       "debiased_tolga_tpr_gender_gap           0.824882\n",
       "strong_debiased_1_tpr_gender_gap        0.716922\n",
       "strong_no_projection_tpr_gender_gap     0.709000\n",
       "strong_debiased_2_tpr_gender_gap        0.596896\n",
       "strong_no_equalize_tpr_gender_gap       0.772645\n",
       "glove_tpr_gender_gap                    0.794059\n",
       "strong_debiased_4_tpr_gender_gap        0.550435\n",
       "strong_debiased_3_tpr_gender_gap        0.707174\n",
       "scrubbed_tpr_gender_gap                -0.282919"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Gender Prediction Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Which model does this correspond to?\n",
    "model_name = 'tf_gru_attention_multiclass_gender_biosbias_glove:v_20190405_142640'\n",
    "gender_df['correct'] = ((gender_df['gender'] == 'M') == gender_df[model_name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.8423\n"
     ]
    }
   ],
   "source": [
    "acc = gender_df.correct.sum()/gender_df.correct.count()\n",
    "print('Accuracy: {:.4f}'.format(acc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "models_eval_py2",
   "language": "python",
   "name": "models_eval_py2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: model_evaluation/Predict bias.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.contrib.framework.python.framework import checkpoint_utils\n",
    "\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from sklearn.preprocessing import normalize\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_DIR_OCCUPATION = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103117/model_dir/model.ckpt-100000'\n",
    "MODEL_DIR_GENDER = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_warmstart_biosbias_glove/20190404_151521/model_dir/model.ckpt-191000'\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extract two matrices."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('dense/bias', [1])\n",
      "('dense/bias/Adam', [1])\n",
      "('dense/bias/Adam_1', [1])\n",
      "('dense/kernel', [256, 1])\n",
      "('dense/kernel/Adam', [256, 1])\n",
      "('dense/kernel/Adam_1', [256, 1])\n",
      "('dense_1/bias', [128])\n",
      "('dense_1/bias/Adam', [128])\n",
      "('dense_1/bias/Adam_1', [128])\n",
      "('dense_1/kernel', [256, 128])\n",
      "('dense_1/kernel/Adam', [256, 128])\n",
      "('dense_1/kernel/Adam_1', [256, 128])\n",
      "('dense_2/bias', [33])\n",
      "('dense_2/bias/Adam', [33])\n",
      "('dense_2/bias/Adam_1', [33])\n",
      "('dense_2/kernel', [128, 33])\n",
      "('dense_2/kernel/Adam', [128, 33])\n",
      "('dense_2/kernel/Adam_1', [128, 33])\n",
      "('embeddings', [400002, 100])\n",
      "('global_step', [])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam', [256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam_1', [256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam', [356, 256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam_1', [356, 256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam', [512])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam_1', [512])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam', [356, 512])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam_1', [356, 512])\n",
      "('signal_early_stopping/STOP', [])\n",
      "('title/beta1_power', [])\n",
      "('title/beta2_power', [])\n"
     ]
    }
   ],
   "source": [
    "var_list = checkpoint_utils.list_variables(MODEL_DIR_OCCUPATION)\n",
    "for v in var_list:\n",
    "    print(v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "kernel_occupation = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_OCCUPATION, 'dense_2/kernel'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('beta1_power', [])\n",
      "('beta2_power', [])\n",
      "('dense/bias', [1])\n",
      "('dense/kernel', [256, 1])\n",
      "('dense_1/bias', [128])\n",
      "('dense_1/kernel', [256, 128])\n",
      "('embeddings', [400002, 100])\n",
      "('final_layer/bias', [33])\n",
      "('final_layer/bias/Adam', [33])\n",
      "('final_layer/bias/Adam_1', [33])\n",
      "('final_layer/kernel', [128, 33])\n",
      "('final_layer/kernel/Adam', [128, 33])\n",
      "('final_layer/kernel/Adam_1', [128, 33])\n",
      "('global_step', [])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])\n",
      "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])\n",
      "('signal_early_stopping/STOP', [])\n"
     ]
    }
   ],
   "source": [
    "var_list = checkpoint_utils.list_variables(MODEL_DIR_GENDER)\n",
    "for v in var_list:\n",
    "    print(v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "kernel_gender = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_GENDER, 'final_layer/kernel'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compute cosine."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "TITLE_LABELS = [\n",
    "    'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',\n",
    "    'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',\n",
    "    'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',\n",
    "    'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',\n",
    "    'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(33, 128)"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kernel_gender.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "kernel_gender_female = normalize(kernel_gender[0].reshape(1, -1))\n",
    "kernel_gender_male = normalize(kernel_gender[1].reshape(1, -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.01618018,  0.1003583 , -0.0723118 ,  0.06453013,  0.22758739,\n",
       "         0.06790616,  0.08027411,  0.10015733, -0.05590729,  0.023418  ,\n",
       "         0.06255525, -0.02604564,  0.09049062, -0.01601316,  0.08945937,\n",
       "        -0.11582728,  0.06244883,  0.07855629,  0.01956639, -0.06774757,\n",
       "         0.00614625, -0.03594974,  0.0652191 , -0.05078628, -0.00807877,\n",
       "         0.06896302,  0.11013658, -0.04664179,  0.11593511,  0.17774113,\n",
       "         0.09496382,  0.12176205,  0.04098931, -0.0970282 ,  0.02898299,\n",
       "         0.10654851, -0.13562816,  0.03486229,  0.12194955,  0.02276845,\n",
       "         0.04589143, -0.06606348, -0.00129113, -0.07973252, -0.02630814,\n",
       "        -0.09769032, -0.1640446 , -0.07602697,  0.00429134,  0.06098389,\n",
       "         0.02934178, -0.07209212, -0.11304612,  0.29547158, -0.04287611,\n",
       "        -0.04518875, -0.02993831,  0.06304532,  0.07989506, -0.09601919,\n",
       "         0.20816126, -0.1977993 ,  0.15119584,  0.01456547,  0.06435941,\n",
       "        -0.07794361, -0.00554093,  0.05497926,  0.0931736 ,  0.22706528,\n",
       "        -0.08019326, -0.0819607 ,  0.04490028, -0.01723337,  0.04124108,\n",
       "         0.13199665, -0.01417105,  0.0725795 , -0.05172402, -0.13563272,\n",
       "        -0.07302421,  0.24843292,  0.14667384, -0.02692026,  0.15892392,\n",
       "         0.02655477, -0.00804625,  0.00184608,  0.02203059,  0.00078905,\n",
       "         0.0115315 ,  0.00199543,  0.05942026,  0.07089076, -0.04697848,\n",
       "        -0.01500242, -0.02432874, -0.02453819, -0.13443194, -0.00370577,\n",
       "        -0.03219581, -0.07874984, -0.05446392,  0.05492223, -0.11461313,\n",
       "        -0.00379655,  0.01339969, -0.01030909,  0.0601744 ,  0.00417376,\n",
       "        -0.02308951, -0.1329045 , -0.00130105,  0.0959954 ,  0.03397062,\n",
       "         0.11269465,  0.00561908,  0.00870924,  0.0339431 ,  0.01517005,\n",
       "        -0.05439634, -0.02544309, -0.13284749,  0.04113958,  0.03033615,\n",
       "        -0.08890872, -0.09986325, -0.09274729]], dtype=float32)"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kernel_gender_male"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "kernel_gender_mean = normalize((kernel_gender_female + kernel_gender_male) / 2)\n",
    "direction_male = kernel_gender_male - np.sum(np.multiply(kernel_gender_male, kernel_gender_mean))* kernel_gender_mean\n",
    "direction_female = kernel_gender_female - np.sum(np.multiply(kernel_gender_female, kernel_gender_mean))* kernel_gender_mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.]], dtype=float32)"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cosine_similarity(direction_female, direction_male)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute mean\n",
    "kernel_occupation_mean = np.mean(kernel_occupation, axis=0)\n",
    "# Apply  x - np.sum(np.multiply(x, mean))* mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accountant:-0.166758477688\n",
      "acupuncturist:0.0150433778763\n",
      "architect:-0.106728702784\n",
      "attorney:-0.0355984941125\n",
      "chiropractor:-0.112065583467\n",
      "comedian:-0.17996160686\n",
      "composer:-0.154989466071\n",
      "dentist:-0.00389941781759\n",
      "dietitian:0.00302037596703\n",
      "dj:-0.156128510833\n",
      "filmmaker:-0.116180986166\n",
      "interior_designer:-0.00478803366423\n",
      "journalist:-0.0217301938683\n",
      "landscape_architect:-0.00763043016195\n",
      "magician:-0.00733107328415\n",
      "massage_therapist:-0.0116159021854\n",
      "model:0.0549785941839\n",
      "nurse:0.099561393261\n",
      "painter:0.0174702480435\n",
      "paralegal:0.0106164813042\n",
      "pastor:-0.161623597145\n",
      "personal_trainer:-0.133440434933\n",
      "photographer:-0.0985902026296\n",
      "physician:-0.00131351128221\n",
      "poet:-0.061441861093\n",
      "professor:0.00782079994678\n",
      "psychologist:0.00208866596222\n",
      "rapper:-0.112389668822\n",
      "real_estate_broker:-0.000683411955833\n",
      "software_engineer:-0.0237298682332\n",
      "surgeon:-0.0968104675412\n",
      "teacher:-0.0625882595778\n",
      "yoga_teacher:0.0292760580778\n"
     ]
    }
   ],
   "source": [
    "for i in range(33):\n",
    "#     _bias = np.abs(cosine_similarity(kernel_gender_female, kernel_occupation[i].reshape(1, -1))) + \\\n",
    "#                   np.abs(cosine_similarity(kernel_gender_male, kernel_occupation[i].reshape(1, -1)))\n",
    "\n",
    "    _bias = (cosine_similarity(kernel_gender_female - kernel_gender_male, kernel_occupation[i].reshape(1, -1)))\n",
    "\n",
    "#     _bias = cosine_similarity(direction_male, kernel_occupation[i].reshape(1, -1))\n",
    "\n",
    "#     x = kernel_occupation[i].reshape(1, -1)\n",
    "#     x = x - np.sum(np.multiply(x, kernel_occupation_mean))* kernel_occupation_mean\n",
    "#     _bias = cosine_similarity(direction_male, x)\n",
    "\n",
    "    print ('{}:{}'.format(TITLE_LABELS[i], float(_bias)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14+"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: model_evaluation/README.md
================================================
# Evaluation Pipeline for Text classification models.

This directory contains utilities to use a model deployed on cloud MLE (in 'utils_export/'), and some notebooks to illustrate a typical evaluation pipeline.

## Environment Setup

### Python Dependencies

Install library dependencies (it is optional, but recommended to install these
in a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html):

```shell
# The python2 way to create and use virtual environment
# (optional, but recommended):
virtualenv .pyenv
source .pyenv/bin/activate
# Install dependencies
pip install -r requirements.txt

jupyter notebook

# ... do stuff ...

# Exit your virtual environment.
deactivate
```

### Google Cloud Storage dependencies

If you need to access data located in Google Cloud Storage, you must [install the Google Cloud SDK](https://cloud.google.com/sdk/docs/) and initialize it within your virtual environment.

## Evaluating one model

The notebook `jigsaw_evaluation_pipeline.ipynb` provides a example of running on evaluation metrics for the ml-fairness project.

We use the `Dataset` and `Model` utilities from `utils_export/` to interact with the models deployed on CMLE and execute the following steps:
 * Load two datasets: 1 dataset to evaluate performance (or intended bias) similar to the training data, and 1 dataset to evaluate the unintended bias that includes identity information.
 * Run the model on each dataset and collect the predictions.
 * Compute evaluation metrics: AUC on the first dataset, pinned_auc on the second one.


## Evaluating several models

This is useful to compare different training runs (with different parameters) but also to compare the evaluation metrics during the training run (several models exported during 1 training run).

TODO(fprost): Write description once the notebook is pushed


## Cloud MLE utilities

The utility library `utils_export/` intends to simplify the use of CMLE deployed models.

### Typical usage pattern

This library will handle the following "overhead" tasks:
 * Convert your pandas `DataFrame` into tf-records, adding an `example_key` to each example.
 * Send an HTTP request to CMLE to run a batch prediction job.
 * Wait for job completion.
 * Parse prediction files and join results with the initial `DataFrame` based on `example_key`.


```python
input_fn = ... (returns pandas DataFrame).
dataset = Dataset(input_fn, dataset_dir)

dataset.load_data(10000)

model = Model(...)
dataset.add_model_prediction_to_data(model)
OR
dataset.add_model_prediction_to_data(model, recompute=False)

dataset.show_data()
```

### `Model`

A `Model` instance describes the key components of a CMLE model.

Key parameters are:
 * how to access the model: project_name, model_names.
 * what the expected inputs to the models are and their respective types (see EncodingFeatureSpec). The types are important to find the right encoding function for TF-records.
 * what the model outputs are.

Example:
```python
model = Model(
    feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING},
    prediction_keys='prediction_key',
    model_names=['model_name1:version1', 'model_name1:version2', 'model_name2:version1']
    project_name='wikidetox')
```


### `Dataset`

A `Dataset` instance is related to a pandas `DataFrame` and will be progressively augmented with the model predictions.

The dataset attributes are:
 * `input_fn`: a function that returns a `DataFrame` (input_data).
 * `DATASET_DIR`: where to save/load all the files associated with the `Dataset`, in particular input_tf_records and cloud mle predictions.


================================================
FILE: model_evaluation/deploy_models.sh
================================================
#!/bin/bash

MODEL_DIRS='gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103329/model_dir,'\
'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103300/model_dir,'\
'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103254/model_dir,'\
'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103245/model_dir,'\
'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103232/model_dir,'\
'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103209/model_dir,'\
'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103152/model_dir,'\
'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103117/model_dir'


python utils_export/deploy_list_models.py --list_model_dir=$MODEL_DIRS --model_name 'tf_test_fprost'


================================================
FILE: model_evaluation/few_shot_learning_baseline_evaluation.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "import getpass\n",
    "from IPython.display import display\n",
    "import json\n",
    "import nltk\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import pkg_resources\n",
    "import os\n",
    "import random\n",
    "import re\n",
    "import seaborn as sns\n",
    "import sklearn.metrics as metrics\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow.python.lib.io import file_io"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils_export.dataset import Dataset, Model\n",
    "from utils_export import utils_cloudml\n",
    "from utils_export import utils_tfrecords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Faster to access GCS file:\n",
    "# https://github.com/tensorflow/tensorflow/issues/15530\n",
    "os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to /Users/msushkov/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk.download('punkt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tokenizer(text, lowercase=True):\n",
    "  \"\"\"Converts text to a list of words.\n",
    "\n",
    "  Args:\n",
    "    text: piece of text to tokenize (string).\n",
    "    lowercase: whether to include lowercasing in preprocessing (bool).\n",
    "\n",
    "  Returns:\n",
    "    A list of strings (words).\n",
    "  \"\"\"\n",
    "  words = nltk.word_tokenize(text.decode('utf-8'))\n",
    "  if lowercase:\n",
    "    words = [w.lower() for w in words]\n",
    "  return words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_test_input_fn(dataset_path,\n",
    "                       model_text_feature,\n",
    "                       dataset_text_feature,\n",
    "                       data_label,\n",
    "                       tokenizer_fn,\n",
    "                       label_data_type=tf.float32,\n",
    "                       max_n_examples=None,\n",
    "                       random_filter_keep_rate=1.0):\n",
    "    \"\"\"Returns a test input function.\n",
    "    \n",
    "    Args:\n",
    "      dataset_path (str): Path to dataset.\n",
    "      model_text_feature (str): The feature column corresponding to the\n",
    "        text input the model expects.\n",
    "      dataset_text_feature (str): The name of the text feature of the dataset.\n",
    "      data_label (str): The output label for the dataset.\n",
    "      tokenizer_fn: Tokenizer function (str -> list).\n",
    "      max_n_examples (int): How many examples to evaluate on.\n",
    "      random_filter_keep_rate (float): Filter out test examples with this probability.\n",
    "\n",
    "    Returns:\n",
    "      Test input function.\n",
    "    \"\"\"\n",
    "    decoding_input_features = {\n",
    "      dataset_text_feature: tf.FixedLenFeature([], dtype=tf.string),\n",
    "      data_label: tf.FixedLenFeature([], dtype=label_data_type)\n",
    "    }\n",
    "\n",
    "    def test_input_fn(max_n_examples=max_n_examples,\n",
    "                      random_filter_keep_rate=random_filter_keep_rate):\n",
    "        \"\"\"Test input function.\n",
    "        \n",
    "        Args:\n",
    "          max_n_examples (int): How many examples to evaluate on.\n",
    "          random_filter_keep_rate (float): Filter out test examples with this probability.\n",
    "          \n",
    "        Returns:\n",
    "          DataFrame with the results.\n",
    "        \"\"\"\n",
    "        res = utils_tfrecords.decode_tf_records_to_pandas(\n",
    "            decoding_input_features,\n",
    "            dataset_path,\n",
    "            max_n_examples,\n",
    "            random_filter_keep_rate)\n",
    "        if not tokenizer_fn:\n",
    "            tok = lambda x: [x]\n",
    "            res[model_text_feature] = list(map(tok, res[dataset_text_feature]))\n",
    "        else:\n",
    "            res[model_text_feature] = list(map(tokenizer_fn, res[dataset_text_feature]))\n",
    "        res = res.rename(columns={ data_label: 'label' })\n",
    "        res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))\n",
    "        final = res.copy(deep=True)\n",
    "        return final\n",
    "\n",
    "    return test_input_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_results(results_df, model_names, print_pr_curve=False):\n",
    "    \"\"\"Print the classification results.\n",
    "    \n",
    "    Args:\n",
    "      results_df: DataFrame with the results.\n",
    "      model_names: List of strings representing the models for which we have results.\n",
    "    \"\"\"\n",
    "    labels = results_df['label']\n",
    "    for _model in model_names:\n",
    "        print(_model)\n",
    "        model_preds = results_df[_model]\n",
    "        fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)\n",
    "        roc_auc = metrics.auc(fpr, tpr)\n",
    "        recalls, precisions, thr = metrics.precision_recall_curve(labels, model_preds)\n",
    "        pr_auc = metrics.auc(precisions, recalls)\n",
    "        model_preds_binary = (model_preds > 0.5).astype(np.int_)\n",
    "        f1 = metrics.f1_score(labels, model_preds_binary)\n",
    "        print('\\tROC AUC: {}'.format(roc_auc))\n",
    "        print('\\tPR AUC: {}'.format(pr_auc))\n",
    "        print('\\tF1: {}'.format(f1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROJECT_NAME = 'conversationai-models'\n",
    "SENTENCE_KEY = 'comment_key' #Input key\n",
    "\n",
    "# Pattern for path of tf_records\n",
    "OUTPUT_DIR_BASE = os.path.join(\n",
    "    'gs://conversationai-models',\n",
    "    getpass.getuser(),\n",
    "    'tfrecords')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate models on Civil Comments dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [],
   "source": [
    "LABEL_NAME_PREDICTION_MODEL = 'toxicity/logistic'\n",
    "DATASET = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/test-*.tfrecord'\n",
    "DATA_LABEL = 'toxicity'\n",
    "DATASET_TEXT_FEATURE='comment_text'\n",
    "\n",
    "# Pattern for path of tf_records\n",
    "OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'civil_comments_test')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### CNN, GRU Attention Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'tokens'\n",
    "MODEL_NAMES = [\n",
    "    'tf_cnn_civil_comments_glove:v_20190219_185541',\n",
    "    'tf_gru_attention_civil_comments_glove:v_20190219_185619',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n",
    "test_dataset.load_data(10000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "civil_comments_test_df = test_dataset.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_cnn_civil_comments_glove:v_20190219_185541\n",
      "\tROC AUC: 0.9573435242534393\n",
      "\tPR AUC: 0.6729934425219886\n",
      "tf_gru_attention_civil_comments_glove:v_20190219_185619\n",
      "\tROC AUC: 0.9649161132104584\n",
      "\tPR AUC: 0.7486011745102973\n"
     ]
    }
   ],
   "source": [
    "print_results(civil_comments_test_df, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TF-Hub Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'text'\n",
    "MODEL_NAMES = [\n",
    "    'tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n",
    "test_dataset.load_data(10000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [],
   "source": [
    "civil_comments_hub_df = test_dataset.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552\n",
      "\tROC AUC: 0.9595451744696132\n",
      "\tPR AUC: 0.7429338592289392\n"
     ]
    }
   ],
   "source": [
    "print_results(civil_comments_hub_df, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate models on Toxicity dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {},
   "outputs": [],
   "source": [
    "LABEL_NAME_PREDICTION_MODEL = 'frac_neg/logistic'\n",
    "DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'\n",
    "DATA_LABEL = 'frac_neg'\n",
    "DATASET_TEXT_FEATURE='comment_text'\n",
    "\n",
    "# Pattern for path of tf_records\n",
    "OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'toxicity_test')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### CNN, GRU Attention Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'tokens'\n",
    "MODEL_NAMES = [\n",
    "    'tf_cnn_toxicity_glove:v_20190219_185532',\n",
    "    'tf_gru_attention_toxicity_glove:v_20190219_185516',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n",
    "test_dataset.load_data(10000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [],
   "source": [
    "toxicity_test_df1 = test_dataset.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_cnn_toxicity_glove:v_20190219_185532\n",
      "\tROC AUC: 0.951760553925346\n",
      "\tPR AUC: 0.8740274773143215\n",
      "tf_gru_attention_toxicity_glove:v_20190219_185516\n",
      "\tROC AUC: 0.9543916575133977\n",
      "\tPR AUC: 0.8814208812923074\n"
     ]
    }
   ],
   "source": [
    "print_results(toxicity_test_df1, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TF-Hub Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'text'\n",
    "MODEL_NAMES = [\n",
    "    'tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n",
    "test_dataset.load_data(10000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {},
   "outputs": [],
   "source": [
    "toxicity_test_df2 = test_dataset.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427\n",
      "\tROC AUC: 0.9270843170934745\n",
      "\tPR AUC: 0.8155815559085313\n"
     ]
    }
   ],
   "source": [
    "print_results(toxicity_test_df2, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate models on Many Communities dataset (full)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "LABEL_NAME_PREDICTION_MODEL = 'removed/logistic'\n",
    "DATASET = 'gs://conversationai-models/resources/transfer_learning_data/many_communities/20181105_answers_all_columns_nthain.tfrecord'\n",
    "DATA_LABEL = 'removed'\n",
    "DATASET_TEXT_FEATURE='comment_text'\n",
    "\n",
    "# Pattern for path of tf_records\n",
    "OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'many_communities_test')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### CNN, GRU Attention Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'tokens'\n",
    "MODEL_NAMES = [\n",
    "    'tf_cnn_many_communities_glove:v_20190219_185551_gpu_p100_4',\n",
    "    #'tf_gru_attention_many_communities:v20190322_142800_507893_1556085643',\n",
    "    #'tf_gru_attention_many_communities:v20190315_161037_23271_1555129264',\n",
    "    'tf_gru_attention_many_communities:v20190705_004839_507000_1562364428_gpu_p100_4',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, tokenizer, label_data_type=tf.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n",
    "test_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {},
   "outputs": [],
   "source": [
    "many_communities_test_df = test_dataset.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 319,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_cnn_many_communities_glove:v_20190219_185551\n",
      "\tROC AUC: 0.7476941464055139\n",
      "\tPR AUC: 0.07604839414024091\n",
      "tf_gru_attention_many_communities:v20190315_161037_23271_1555129264\n",
      "\tROC AUC: 0.7215269560475308\n",
      "\tPR AUC: 0.06656538517176142\n"
     ]
    }
   ],
   "source": [
    "print_results(many_communities_test_df, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TF-Hub Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'text'\n",
    "MODEL_NAMES = [\n",
    "    'tf_hub_classifier_many_communities:v20190219_185602_316000_1553563221_gpu_v100_4',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, None, label_data_type=tf.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n",
    "test_dataset.load_data(10000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "many_communities_tfhub_test_df = test_dataset.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print_results(many_communities_tfhub_test_df, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate models on Many Communities subset (adapted for few-shot learning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "LABEL_NAME_PREDICTION_MODEL = 'label/logistic'\n",
    "DATASET_VALID = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord'\n",
    "DATASET_TEST = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord'\n",
    "DATA_LABEL = 'label'\n",
    "DATASET_TEXT_FEATURE='text'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pessimistic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pattern for path of tf_records\n",
    "OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/valid')\n",
    "OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/test')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### CNN, GRU Attention Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'tokens'\n",
    "MODEL_NAMES = [\n",
    "    'tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1',\n",
    "    'tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "valid_input_fn = make_test_input_fn(\n",
    "    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, tokenizer, label_data_type=tf.int64)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, tokenizer, label_data_type=tf.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n",
    "valid_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n",
    "test_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1\n",
      "\tROC AUC: 0.8233381391772395\n",
      "\tPR AUC: 0.8062951511107903\n",
      "\tF1: 0.7607565011820331\n",
      "tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1\n",
      "\tROC AUC: 0.8303615196078432\n",
      "\tPR AUC: 0.8125045070656154\n",
      "\tF1: 0.7703703703703705\n"
     ]
    }
   ],
   "source": [
    "print_results(valid_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1\n",
      "\tROC AUC: 0.7981477681641835\n",
      "\tPR AUC: 0.7900106468171257\n",
      "\tF1: 0.7378091872791519\n",
      "tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1\n",
      "\tROC AUC: 0.8074846866462235\n",
      "\tPR AUC: 0.7951370231895221\n",
      "\tF1: 0.7507100720996286\n"
     ]
    }
   ],
   "source": [
    "print_results(test_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### TF-Hub Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'text'\n",
    "MODEL_NAMES = [\n",
    "    'tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "valid_input_fn = make_test_input_fn(\n",
    "    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, None, label_data_type=tf.int64)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, None, label_data_type=tf.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n",
    "valid_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n",
    "test_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1\n",
      "\tROC AUC: 0.8612435121107267\n",
      "\tPR AUC: 0.851153195076283\n",
      "\tF1: 0.7937575030012005\n"
     ]
    }
   ],
   "source": [
    "print_results(valid_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1\n",
      "\tROC AUC: 0.8434673869262717\n",
      "\tPR AUC: 0.8326080326940988\n",
      "\tF1: 0.779380468195791\n"
     ]
    }
   ],
   "source": [
    "print_results(test_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Optimistic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pattern for path of tf_records\n",
    "OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/valid')\n",
    "OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/test')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### CNN, GRU Attention Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'tokens'\n",
    "MODEL_NAMES = [\n",
    "    'tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1',\n",
    "    'tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "valid_input_fn = make_test_input_fn(\n",
    "    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, tokenizer, label_data_type=tf.int64)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, tokenizer, label_data_type=tf.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n",
    "valid_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n",
    "test_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1\n",
      "\tROC AUC: 0.8304709727028066\n",
      "\tPR AUC: 0.8191225889787218\n",
      "\tF1: 0.7564259485924112\n",
      "tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1\n",
      "\tROC AUC: 0.8293254998077663\n",
      "\tPR AUC: 0.8181913933482414\n",
      "\tF1: 0.7652214022140222\n"
     ]
    }
   ],
   "source": [
    "print_results(valid_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1\n",
      "\tROC AUC: 0.8043942295635125\n",
      "\tPR AUC: 0.79754755517453\n",
      "\tF1: 0.7305737109658679\n",
      "tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1\n",
      "\tROC AUC: 0.8156875904836816\n",
      "\tPR AUC: 0.8081941065311745\n",
      "\tF1: 0.7558876811594204\n"
     ]
    }
   ],
   "source": [
    "print_results(test_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### TF-Hub Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_TEXT_FEATURE = 'text'\n",
    "MODEL_NAMES = [\n",
    "    'tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1',\n",
    "]\n",
    "\n",
    "model_input_spec = {\n",
    "    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n",
    "}\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)\n",
    "\n",
    "valid_input_fn = make_test_input_fn(\n",
    "    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, None, label_data_type=tf.int64)\n",
    "\n",
    "test_input_fn = make_test_input_fn(\n",
    "    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n",
    "    DATA_LABEL, None, label_data_type=tf.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n",
    "valid_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need to set seed before loading data to be able to reload same data in the future\n",
    "random.seed(2018)\n",
    "\n",
    "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n",
    "test_dataset.load_data(100000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n",
    "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1\n",
      "\tROC AUC: 0.8680750192233757\n",
      "\tPR AUC: 0.8623373414090059\n",
      "\tF1: 0.7900994904149479\n"
     ]
    }
   ],
   "source": [
    "print_results(valid_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1\n",
      "\tROC AUC: 0.8526337876041631\n",
      "\tPR AUC: 0.8481017558154519\n",
      "\tF1: 0.784984556901877\n"
     ]
    }
   ],
   "source": [
    "print_results(test_dataset.show_data(), MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Finetuned"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.utils import fixes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_list_results_files(parent_dir):\n",
    "    \"\"\"Gets the paths of all results files that are in parent_dir.\"\"\"\n",
    "    file_list = []\n",
    "    for subdirectory, _, files in tf.gfile.Walk(parent_dir):\n",
    "        [file_list.append(os.path.join(parent_dir, fname)) for fname in files]\n",
    "    return file_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_csv_predictions(pred_file, is_test=False):\n",
    "    \"\"\"Load the CSV file with predictions and labels.\"\"\"\n",
    "    model_predictions = None\n",
    "    labels = None\n",
    "    communities = None\n",
    "    names = ['label', 'pred', 'community']\n",
    "    if is_test:\n",
    "        names = ['community', 'label', 'pred']\n",
    "    with file_io.FileIO(pred_file, 'r') as f:\n",
    "        df = pd.read_csv(f, header=None, names=names)\n",
    "        labels = df['label'].values\n",
    "        model_predictions = df['pred'].values\n",
    "        communities = df['community'].values\n",
    "    return labels, model_predictions, communities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_pr_curve(precisions, recalls, identifier=None):\n",
    "    \"\"\"Plots the Precision/Recall curve.\n",
    "    Args:\n",
    "      precisions: Precisions at all score thresholds.\n",
    "      recalls: Recalls at all score thresholds.\n",
    "      identifier: Optional string indicating what this curve is.\n",
    "    \"\"\"\n",
    "    precision_recall_auc = metrics.auc(recalls, precisions)\n",
    "    plt.figure()\n",
    "    step_kwargs = ({\n",
    "        'step': 'post'\n",
    "    } if 'step' in fixes.signature(plt.fill_between).parameters else {})\n",
    "    plt.step(recalls, precisions, color='b', alpha=0.2, where='post')\n",
    "    plt.fill_between(recalls, precisions, alpha=0.2, color='b', **step_kwargs)\n",
    "    plt.xlabel('Recall')\n",
    "    plt.ylabel('Precision')\n",
    "    plt.ylim([0.0, 1.05])\n",
    "    plt.xlim([0.0, 1])\n",
    "    if identifier:\n",
    "        plt.title('PR curve for %s (AUC = %.2f).' % (\n",
    "            identifier, precision_recall_auc))\n",
    "    else:\n",
    "        plt.title('PR curve (AUC = %.2f).' % precision_recall_auc)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_metrics_from_dir(results_dir, is_test=False):\n",
    "    files = get_list_results_files(results_dir)\n",
    "    for file_path in files:\n",
    "        curr_trial_name = os.path.basename(file_path)\n",
    "        print(curr_trial_name)\n",
    "        labels, model_preds, communities = load_csv_predictions(file_path, is_test)\n",
    "        fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)\n",
    "        roc_auc = metrics.auc(fpr, tpr)\n",
    "        precisions, recalls, thr = metrics.precision_recall_curve(labels, model_preds)\n",
    "        pr_auc = metrics.auc(recalls, precisions)\n",
    "        model_preds_binary = (model_preds > 0.5).astype(np.int_)\n",
    "        f1 = metrics.f1_score(labels, model_preds_binary)\n",
    "        print('\\tROC AUC: {}'.format(roc_auc))\n",
    "        print('\\tPR AUC: {}'.format(pr_auc))\n",
    "        print('\\tF1: {}'.format(f1))\n",
    "        plot_pr_curve(precisions, recalls, curr_trial_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "TF_CNN_VALID_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/validation\"\n",
    "TF_GRU_VALID_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/validation\"\n",
    "TF_HUB_VALID_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/validation\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compute_metrics_from_dir(TF_CNN_VALID_RESULTS_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compute_metrics_from_dir(TF_GRU_VALID_RESULTS_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compute_metrics_from_dir(TF_HUB_VALID_RESULTS_DIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "TF_CNN_TEST_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/test\"\n",
    "TF_GRU_TEST_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/test\"\n",
    "TF_HUB_TEST_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/test\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compute_metrics_from_dir(TF_CNN_TEST_RESULTS_DIR, is_test=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compute_metrics_from_dir(TF_GRU_TEST_RESULTS_DIR, is_test=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compute_metrics_from_dir(TF_HUB_TEST_RESULTS_DIR)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: model_evaluation/input_fn_example.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines some examples of input_fn for the evaluation notebook."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import numpy as np
import pandas as pd
import pkg_resources
import os
import random
import re

import tensorflow as tf
from tensorflow.python.lib.io import file_io

from unintended_ml_bias import model_bias_analysis
from utils_export import utils_tfrecords

#Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530
os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'

#TODO(fprost): Clean this file.

#### #### #### #### #### ####
#### PERFORMANCE DATASET ####
#### #### #### #### #### ####


def create_input_fn_toxicity_performance(tokenizer, model_input_comment_field):
  """Generates an input_fn to evaluate model performance on toxicity dataset."""

  TOXICITY_PERFORMANCE_DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'
  TOXICITY_DATA_LABEL = 'frac_neg'  #Name of the label in the dataset
  TOXICITY_COMMENT_NAME = 'comment_text'  #Name of the comment in the dataset

  # DECODING
  decoding_input_features = {
      TOXICITY_COMMENT_NAME: tf.FixedLenFeature([], dtype=tf.string),
      TOXICITY_DATA_LABEL: tf.FixedLenFeature([], dtype=tf.float32)
  }

  def input_fn_performance_toxicity(max_n_examples=None,
                                    random_filter_keep_rate=1.0):
    res = utils_tfrecords.decode_tf_records_to_pandas(
        decoding_input_features, TOXICITY_PERFORMANCE_DATASET, max_n_examples,
        random_filter_keep_rate)
    res[model_input_comment_field] = list(
        map(tokenizer, res[TOXICITY_COMMENT_NAME]))
    res = res.rename(columns={TOXICITY_DATA_LABEL: 'label'})
    res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))
    final = res.copy(deep=True)
    return final

  return input_fn_performance_toxicity


#### #### #### #### ####
#### CIVIL DATASET  ####
#### #### #### #### ####

CIVIL_COMMENTS_PATH = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/eval-00000-of-00003.tfrecord'
THRESHOLD_BIAS_CIVIL = 0.5

civil_comments_spec = {
    'comment_text':
        tf.FixedLenFeature([], dtype=tf.string),
    'id':
        tf.FixedLenFeature([], dtype=tf.string),
    'toxicity':
        tf.FixedLenFeature([], dtype=tf.float32),
    'severe_toxicity':
        tf.FixedLenFeature([], dtype=tf.float32),
    'obscene':
        tf.FixedLenFeature([], dtype=tf.float32),
    'sexual_explicit':
        tf.FixedLenFeature([], dtype=tf.float32),
    'identity_attack':
        tf.FixedLenFeature([], dtype=tf.float32),
    'insult':
        tf.FixedLenFeature([], dtype=tf.float32),
    'threat':
        tf.FixedLenFeature([], dtype=tf.float32),
    'toxicity_annotator_count':
        tf.FixedLenFeature([], dtype=tf.int64),
    'identity_annotator_count':
        tf.FixedLenFeature([], dtype=tf.int64),
    'male':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'female':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'transgender':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_gender':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'heterosexual':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'homosexual_gay_or_lesbian':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'bisexual':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_sexual_orientation':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'christian':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'jewish':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'muslim':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'hindu':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'buddhist':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'atheist':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_religion':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'black':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'white':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'asian':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'latino':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_race_or_ethnicity':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'physical_disability':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'intellectual_or_learning_disability':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'psychiatric_or_mental_illness':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_disability':
        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
}

identity_terms_civil = [
    'male', 'female', 'transgender', 'other_gender', 'heterosexual',
    'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation',
    'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist',
    'other_religion', 'black', 'white', 'asian', 'latino',
    'other_race_or_ethnicity', 'physical_disability',
    'intellectual_or_learning_disability', 'psychiatric_or_mental_illness',
    'other_disability'
]

CIVIL_COMMENT_NAME = 'comment_text'


def create_input_fn_civil_performance(tokenizer, model_input_comment_field):
  """Generates an input_fn to evaluate model performance on civil dataset."""

  def input_fn_performance_civil(max_n_examples=None,
                                 random_filter_keep_rate=1.0):
    civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(
        civil_comments_spec,
        CIVIL_COMMENTS_PATH,
        max_n_examples=max_n_examples,
        random_filter_keep_rate=random_filter_keep_rate,
    )
    civil_df_raw[CIVIL_COMMENT_NAME] = list(
        map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME]))
    civil_df_raw['toxicity'] = list(
        map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))
    civil_df_raw = civil_df_raw.rename(columns={
        CIVIL_COMMENT_NAME: model_input_comment_field,
        'toxicity': 'label'
    })
    res = civil_df_raw.copy(deep=True)
    return res

  return input_fn_performance_civil


def create_input_fn_civil_bias(tokenizer, model_input_comment_field):
  """"Generates an input_fn to evaluate model bias on civil dataset.

  Construction of this database such as:
      We keep only examples that have identity labels (with rule: male >=0).
      We apply the 'threshold_bias_civil' for each identity field.
      We select x% of the "background", i.e. examples that are 0 for each
      identify.

  Indeed, as the background is dominant, we want to reduce the size of the test
  set.
  """

  def filter_fn_civil(example, background_filter_keep_rate=0.1):
    if example['male'] < 0.:
      return False
    contains_one_identity = False
    for _term in identity_terms_civil:
      if example[_term] >= THRESHOLD_BIAS_CIVIL:
        contains_one_identity = True
    if contains_one_identity:
      return True
    else:
      return (random.random() < background_filter_keep_rate)

  def input_fn_bias_civil(max_n_examples=None):
    civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(
        civil_comments_spec,
        CIVIL_COMMENTS_PATH,
        max_n_examples=max_n_examples,
        filter_fn=filter_fn_civil,
    )
    civil_df_raw[CIVIL_COMMENT_NAME] = list(
        map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME]))
    for _term in identity_terms_civil:
      civil_df_raw[_term] = list(
          map(lambda x: x >= THRESHOLD_BIAS_CIVIL, list(civil_df_raw[_term])))
    civil_df_raw['toxicity'] = list(
        map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))
    civil_df_raw = civil_df_raw.rename(columns={
        CIVIL_COMMENT_NAME: model_input_comment_field,
        'toxicity': 'label'
    })
    res = civil_df_raw.copy(deep=True)
    return res

  return input_fn_bias_civil


#### #### #### #### #### ####
####  SYNTHETIC DATASET  ####
#### #### #### #### #### ####


def create_input_fn_artificial_bias(tokenizer, model_input_comment_field):
  """Generates an input_fn to evaluate model bias on synthetic dataset."""

  def input_fn_bias(max_n_examples):

    # Loading it from it the unintended_ml_bias github.
    entire_test_bias_df = pd.read_csv(
        pkg_resources.resource_stream('unintended_ml_bias',
                                      'eval_datasets/bias_madlibs_77k.csv'))
    entire_test_bias_df['raw_text'] = entire_test_bias_df['Text']
    entire_test_bias_df['label'] = entire_test_bias_df['Label']
    entire_test_bias_df['label'] = list(
        map(lambda x: x == 'BAD', entire_test_bias_df['label']))
    entire_test_bias_df = entire_test_bias_df[['raw_text', 'label']].copy()
    identity_terms_synthetic = [
        line.strip() for line in pkg_resources.resource_stream(
            'unintended_ml_bias', 'bias_madlibs_data/adjectives_people.txt')
    ]
    model_bias_analysis.add_subgroup_columns_from_text(
        entire_test_bias_df, 'raw_text', identity_terms_synthetic)

    # Add preprocessing
    entire_test_bias_df['text'] = list(
        map(tokenizer, entire_test_bias_df['raw_text']))
    if max_n_examples:
      res = entire_test_bias_df.sample(n=max_n_examples, random_state=2018)
    else:
      res = entire_test_bias_df
    res = res.copy(deep=True)
    res = res.rename(columns={'raw_text': model_input_comment_field})
    return res

  return input_fn_bias

#### #### #### #### #### ####
####  BIASBIOS DATASET   ####
#### #### #### #### #### ####

BIASBIOS_PATH = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190225173815/test*.tfrecord'
SCRUBBED_BIASBIOS_PATH = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190225173815_scrubbed/test*.tfrecord'

comments_spec = {
    'comment_text':
        tf.FixedLenFeature([], dtype=tf.string),
    'gender':
        tf.FixedLenFeature([], dtype=tf.string),
    'title':
        tf.FixedLenFeature([], dtype=tf.int64)
}

identity_terms = [
    'gender'
]

COMMENT_NAME = 'comment_text'
LABEL_NAME = 'title'


def create_input_fn_biasbios(tokenizer, model_input_comment_field, scrubbed=False):
  """"Generates an input_fn to evaluate model bias on biasbios dataset.
  """

  def filter_fn_biasbios(example, background_filter_keep_rate=1.0):
    return (random.random() < background_filter_keep_rate)

  def input_fn_biasbios(max_n_examples=None, random_filter_keep_rate=1.0):
    if scrubbed:
      path = SCRUBBED_BIASBIOS_PATH
    else:
      path = BIASBIOS_PATH
    df_raw = utils_tfrecords.decode_tf_records_to_pandas(
        comments_spec,
        path,
        max_n_examples=max_n_examples,
        filter_fn=filter_fn_biasbios,
    )
    df_raw[COMMENT_NAME] = list(
        map(tokenizer, df_raw[COMMENT_NAME]))
    #for _term in identity_terms:
    #  df_raw[_term] = list(df_raw[_term])
    #df_raw[LABEL_NAME] = list(df_raw[LABEL_NAME])
    df_raw = df_raw.rename(columns={
        COMMENT_NAME: model_input_comment_field,
        LABEL_NAME: 'label'
    })
    res = df_raw.copy(deep=True)
    return res

  return input_fn_biasbios

#### #### #### #### #### ####
####  SYNTHETIC DATASET  ####
#### #### #### #### #### ####


def create_input_fn_artificial_bias(tokenizer, model_input_comment_field):
  """Generates an input_fn to evaluate model bias on synthetic dataset."""

  def input_fn_bias(max_n_examples):

    # Loading it from it the unintended_ml_bias github.
    entire_test_bias_df = pd.read_csv(
        pkg_resources.resource_stream('unintended_ml_bias',
                                      'eval_datasets/bias_madlibs_77k.csv'))
    entire_test_bias_df['raw_text'] = entire_test_bias_df['Text']
    entire_test_bias_df['label'] = entire_test_bias_df['Label']
    entire_test_bias_df['label'] = list(
        map(lambda x: x == 'BAD', entire_test_bias_df['label']))
    entire_test_bias_df = entire_test_bias_df[['raw_text', 'label']].copy()
    identity_terms_synthetic = [
        line.strip() for line in pkg_resources.resource_stream(
            'unintended_ml_bias', 'bias_madlibs_data/adjectives_people.txt')
    ]
    model_bias_analysis.add_subgroup_columns_from_text(
        entire_test_bias_df, 'raw_text', identity_terms_synthetic)

    # Add preprocessing
    entire_test_bias_df['text'] = list(
        map(tokenizer, entire_test_bias_df['raw_text']))
    if max_n_examples:
      res = entire_test_bias_df.sample(n=max_n_examples, random_state=2018)
    else:
      res = entire_test_bias_df
    res = res.copy(deep=True)
    res = res.rename(columns={'raw_text': model_input_comment_field})
    return res

  return input_fn_bias

================================================
FILE: model_evaluation/jigsaw_evaluation_pipeline.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "-YibCLoSLRHp"
   },
   "source": [
    "Copyright 2018 Google LLC.\n",
    "\n",
    "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "you may not use this file except in compliance with the License.\n",
    "You may obtain a copy of the License at\n",
    "\n",
    "https://www.apache.org/licenses/LICENSE-2.0\n",
    "\n",
    "Unless required by applicable law or agreed to in writing, software\n",
    "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "See the License for the specific language governing permissions and\n",
    "limitations under the License."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "LMykUGMauh9b"
   },
   "source": [
    "# Evaluation code\n",
    "\n",
    "\n",
    "__Disclaimer__\n",
    "*   This notebook contains experimental code, which may be changed without notice.\n",
    "*   The ideas here are some ideas relevant to fairness - they are not the whole story!\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Notebook summary"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook intends to evaluate a list of models on two dimensions:\n",
    "- \"Performance\": How well the model perform to classify the data (intended bias). Currently, we use the AUC.\n",
    "- \"Bias\": How much bias does the model contain (unintended bias). Currently, we use the pinned auc.\n",
    "\n",
    "This script takes the following steps:\n",
    "\n",
    "- Defines the models to evaluate and specify their signature (expected inputs/outputs).\n",
    "- Write input function to generate 2 datasets:\n",
    "    - A \"performance dataset\" which will be used for the first set of metrics. This dataset is supposed to be similar format to the training data (contain a piece of text and a label).\n",
    "    - A \"bias dataset\" which will be used for the second set of metrics. This data contains a piece of text, a label but also some subgroup information to evaluate the unintended bias on.\n",
    "- Runs predictions with the export_utils.\n",
    "- Evaluate metrics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "import getpass\n",
    "from IPython.display import display\n",
    "import json\n",
    "import nltk\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import pkg_resources\n",
    "import os\n",
    "import random\n",
    "import re\n",
    "import seaborn as sns\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow.python.lib.io import file_io"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#from google.colab import auth\n",
    "#auth.authenticate_user()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pip install -U -q git+https://github.com/conversationai/unintended-ml-bias-analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from unintended_ml_bias import model_bias_analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import input_fn_example\n",
    "from utils_export.dataset import Dataset, Model\n",
    "from utils_export import utils_cloudml\n",
    "from utils_export import utils_tfrecords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to /Users/nthain/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk.download('punkt')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Settings"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Global variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# User inputs\n",
    "PROJECT_NAME = 'conversationai-models'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 1: Defining your model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An important user input is the description of the deployed models that are evaluated.\n",
    "\n",
    "1- Defining which model will be used.\n",
    "$MODEL_NAMES defined the different names (format: \"model_name:version\").\n",
    "\n",
    "2- Defining the model signature.\n",
    "Currently, the `Dataset` API does not detect the signature of a CMLE model, so this information is given by a `Model` instance.\n",
    "You need to describe:\n",
    "- input_spec: what the input_file should be (argument `feature_keys_spec`). It is a dictionary which describes the name of the fields and their types.\n",
    "- prediction_keys (argument `prediction_keys`). It is the name of the prediction field in the model output.\n",
    "- Name of the example key (argument `example_key`). A unique identifier for each sentence which will be generated by the dataset API (a.k.a. your input data does not need to have this field).\n",
    "    - When using Cloud MLE for batch predictions, data is processed in an unpredictable order. To be able to match the returned predictions with your input instances, you must have instance keys defined."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# User inputs:\n",
    "MODEL_NAMES = [\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738', # ??\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748', # ??\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820', # ??\n",
    "    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828', # ??\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# User inputs: Model description (see above for more info).\n",
    "TEXT_FEATURE_NAME = 'tokens' #Input defined in serving function called in run.py (arg: `text_feature_name`).\n",
    "SENTENCE_KEY = 'comment_key' #Input key defined in serving functioncalled in run.py (arg: `example_key_name`).\n",
    "#LABEL_NAME_PREDICTION_MODEL = 'scores' # Output prediction: typically $label_name/logistic\n",
    "LABEL_NAME_PREDICTION_MODEL = 'probabilities' # Output prediction: typically $label_name/logistic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_input_spec = {\n",
    "    TEXT_FEATURE_NAME: utils_tfrecords.EncodingFeatureSpec.LIST_STRING} #library will use this automatically\n",
    "\n",
    "model = Model(\n",
    "    feature_keys_spec=model_input_spec,\n",
    "    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n",
    "    example_key=SENTENCE_KEY,\n",
    "    model_names=MODEL_NAMES,\n",
    "    project_name=PROJECT_NAME)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 2: Defining the input_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tokenizer(text, lowercase=True):\n",
    "  \"\"\"Converts text to a list of words.\n",
    "\n",
    "  Args:\n",
    "    text: piece of text to tokenize (string).\n",
    "    lowercase: whether to include lowercasing in preprocessing (boolean).\n",
    "    tokenizer: Python function to tokenize the text on.\n",
    "\n",
    "  Returns:\n",
    "    A list of strings (words).\n",
    "  \"\"\"\n",
    "  words = nltk.word_tokenize(text.decode('utf-8'))\n",
    "  if lowercase:\n",
    "    words = [w.lower() for w in words]\n",
    "  return words"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Defining input_fn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We need to define first some input_fn which will be fed to the `Dataset` API.\n",
    "An input_fn must follow the following requirements:\n",
    "- Returns a pandas DataFrame\n",
    "- Have an argument 'max_n_examples' to control the size of the dataframe.\n",
    "- Containing at least a field $TEXT_FEATURE_NAME, which maps to a tokenized text (list of words) AND  a field 'label' which is 1 for toxic (0 otherwise).\n",
    "\n",
    "We will define two different input_fn (1 for performance, 1 for bias). The bias input_fn should also contain identity information.\n",
    "\n",
    "Note: You can use ANY input_fn that matches those requirements. You can find a few examples of input_fn in the file input_fn_example.py (for toxicity and civil_comments dataset)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# User inputs: Choose which one you want to use OR create your own!\n",
    "INPUT_FN_PERFORMANCE = input_fn_example.create_input_fn_biasbios(\n",
    "    tokenizer,\n",
    "    model_input_comment_field=TEXT_FEATURE_NAME,\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 3: Running prediction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Performance dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# User inputs\n",
    "SIZE_PERFORMANCE_DATA_SET = 10000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gs://conversationai-models/nthain/tfrecords/performance_dataset_dir\n"
     ]
    }
   ],
   "source": [
    "# Pattern for path of tf_records\n",
    "PERFORMANCE_DATASET_DIR = os.path.join(\n",
    "    'gs://conversationai-models/',\n",
    "    getpass.getuser(),\n",
    "    'tfrecords',\n",
    "    'performance_dataset_dir')\n",
    "print(PERFORMANCE_DATASET_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:input_fn is compatible with the `Dataset` class.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/tensorflow/python/client/session.py:1711: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).\n",
      "  warnings.warn('An interactive session is already active. This can '\n"
     ]
    }
   ],
   "source": [
    "dataset_performance = Dataset(INPUT_FN_PERFORMANCE, PERFORMANCE_DATASET_DIR)\n",
    "random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future\n",
    "dataset_performance.load_data(SIZE_PERFORMANCE_DATA_SET, random_filter_keep_rate=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tokens</th>\n",
       "      <th>gender</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[in, her, role, ,, she, is, a, member, of, an,...</td>\n",
       "      <td>F</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[his, blog, www.donaldhtaylorjr.blogspot.com, ...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[he, has, primarily, reported, for, the, atlan...</td>\n",
       "      <td>M</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[andrea, 's, area, of, expertise, is, in, whol...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[dr., milane, was, trained, as, a, national, c...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[he, is, also, visiting, associate, professor,...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>[her, research, focuses, on, the, trafficking,...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[he, has, been, licensed, to, practice, law, i...</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[after, a, two-year, postdoctoral, fellowship,...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[prior, to, teaching, ,, she, was, an, account...</td>\n",
       "      <td>F</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>[jackie, 's, works, are, published, in, academ...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>[her, research, topic, was, the, investigation...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>[she, graduated, with, honors, in, 2012, ., ha...</td>\n",
       "      <td>F</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>[his, research, focuses, on, the, japan, air, ...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>[she, directed, the, 2014, peabody, award-winn...</td>\n",
       "      <td>F</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>[he, lends, his, exceptional, surgical, skills...</td>\n",
       "      <td>M</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>[he, teaches, courses, ranging, from, core, un...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>[her, major, fields, of, interest, are, develo...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>[dr., cole, honors, several, insurance, carrie...</td>\n",
       "      <td>M</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>[she, practices, in, the, areas, of, business,...</td>\n",
       "      <td>F</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>[she, has, obtained, her, phd, in, eu, law, fr...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>[his, photographs, are, reminiscent, of, silho...</td>\n",
       "      <td>M</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>[he, earned, his, ph.d., at, the, university, ...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>[his, inter-, disciplinary, research, interest...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>[she, earned, her, ph.d., in, communication, s...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>[his, current, projects, examine, intergenerat...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>[he, has, served, as, an, expert, witness, in,...</td>\n",
       "      <td>M</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>[she, 's, called, in, some, of, the, parent, o...</td>\n",
       "      <td>F</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>[nneka, has, recently, become, interested, in,...</td>\n",
       "      <td>F</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>[she, writes, regularly, for, faith, and, lead...</td>\n",
       "      <td>F</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9970</th>\n",
       "      <td>[he, was, previously, an, assistant, professor...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9971</th>\n",
       "      <td>[aside, from, filmmaking, ,, he, ’, s, an, avi...</td>\n",
       "      <td>M</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9972</th>\n",
       "      <td>[he, lives, in, dallas, with, his, wife, and, ...</td>\n",
       "      <td>M</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9973</th>\n",
       "      <td>[he, exhibited, in, institutions, like, kultur...</td>\n",
       "      <td>M</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9974</th>\n",
       "      <td>[he, has, represented, numerous, municipalitie...</td>\n",
       "      <td>M</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9975</th>\n",
       "      <td>[his, works, include, portrait, ,, glamour, an...</td>\n",
       "      <td>M</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9976</th>\n",
       "      <td>[he, began, using, haskell, during, his, senio...</td>\n",
       "      <td>M</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9977</th>\n",
       "      <td>[he, has, been, involved, with, streaming, med...</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9978</th>\n",
       "      <td>[he, has, also, produced, lecture, courses, fo...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9979</th>\n",
       "      <td>[after, completing, her, degrees, at, the, uni...</td>\n",
       "      <td>F</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9980</th>\n",
       "      <td>[this, is, a, slightly, edited, version, of, h...</td>\n",
       "      <td>F</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9981</th>\n",
       "      <td>[she, received, her, b.sc, ., in, nutrition, f...</td>\n",
       "      <td>F</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9982</th>\n",
       "      <td>[she, is, the, author, of, pelo, bueno, y, otr...</td>\n",
       "      <td>F</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9983</th>\n",
       "      <td>[she, obtained, her, bachelor, of, science, de...</td>\n",
       "      <td>F</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9984</th>\n",
       "      <td>[dr., kanchan, singh, practices, at, singh, de...</td>\n",
       "      <td>M</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9985</th>\n",
       "      <td>[prior, to, joining, fresh, 'n, fit, cuisine, ...</td>\n",
       "      <td>F</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9986</th>\n",
       "      <td>[he, worked, on, staff, at, aopa, pilot, magaz...</td>\n",
       "      <td>M</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9987</th>\n",
       "      <td>[he, started, working, on, these, themes, duri...</td>\n",
       "      <td>M</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9988</th>\n",
       "      <td>[his, research, aims, to, understand, the, con...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9989</th>\n",
       "      <td>[he, received, the, ph.d., degree, in, measuri...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9990</th>\n",
       "      <td>[he, currently, practices, at, johns, hopkins,...</td>\n",
       "      <td>M</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9991</th>\n",
       "      <td>[she, received, her, m.a, ., in, secondary, ed...</td>\n",
       "      <td>F</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9992</th>\n",
       "      <td>[his, research, interests, lie, in, the, study...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9993</th>\n",
       "      <td>[she, graduated, with, honors, in, 2000, ., ha...</td>\n",
       "      <td>F</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9994</th>\n",
       "      <td>[chris, primarily, teaches, anatomy, and, phys...</td>\n",
       "      <td>M</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>[always, responsive, to, the, specific, geogra...</td>\n",
       "      <td>F</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>[he, has, worked, on, numerous, projects, that...</td>\n",
       "      <td>M</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>[he, graduated, from, the, academy, of, visual...</td>\n",
       "      <td>M</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>[most, of, his, writing, is, from, the, middle...</td>\n",
       "      <td>M</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>[he, is, currently, on, the, good, news, poetr...</td>\n",
       "      <td>M</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 tokens gender  label\n",
       "0     [in, her, role, ,, she, is, a, member, of, an,...      F     17\n",
       "1     [his, blog, www.donaldhtaylorjr.blogspot.com, ...      M     25\n",
       "2     [he, has, primarily, reported, for, the, atlan...      M     12\n",
       "3     [andrea, 's, area, of, expertise, is, in, whol...      F     25\n",
       "4     [dr., milane, was, trained, as, a, national, c...      F     25\n",
       "5     [he, is, also, visiting, associate, professor,...      M     25\n",
       "6     [her, research, focuses, on, the, trafficking,...      F     25\n",
       "7     [he, has, been, licensed, to, practice, law, i...      M      3\n",
       "8     [after, a, two-year, postdoctoral, fellowship,...      M     25\n",
       "9     [prior, to, teaching, ,, she, was, an, account...      F     31\n",
       "10    [jackie, 's, works, are, published, in, academ...      F     25\n",
       "11    [her, research, topic, was, the, investigation...      F     25\n",
       "12    [she, graduated, with, honors, in, 2012, ., ha...      F     17\n",
       "13    [his, research, focuses, on, the, japan, air, ...      M     25\n",
       "14    [she, directed, the, 2014, peabody, award-winn...      F     10\n",
       "15    [he, lends, his, exceptional, surgical, skills...      M     30\n",
       "16    [he, teaches, courses, ranging, from, core, un...      M     25\n",
       "17    [her, major, fields, of, interest, are, develo...      F     25\n",
       "18    [dr., cole, honors, several, insurance, carrie...      M     23\n",
       "19    [she, practices, in, the, areas, of, business,...      F      3\n",
       "20    [she, has, obtained, her, phd, in, eu, law, fr...      F     25\n",
       "21    [his, photographs, are, reminiscent, of, silho...      M     22\n",
       "22    [he, earned, his, ph.d., at, the, university, ...      M     25\n",
       "23    [his, inter-, disciplinary, research, interest...      M     25\n",
       "24    [she, earned, her, ph.d., in, communication, s...      F     25\n",
       "25    [his, current, projects, examine, intergenerat...      M     25\n",
       "26    [he, has, served, as, an, expert, witness, in,...      M      0\n",
       "27    [she, 's, called, in, some, of, the, parent, o...      F     31\n",
       "28    [nneka, has, recently, become, interested, in,...      F      3\n",
       "29    [she, writes, regularly, for, faith, and, lead...      F     20\n",
       "...                                                 ...    ...    ...\n",
       "9970  [he, was, previously, an, assistant, professor...      M     25\n",
       "9971  [aside, from, filmmaking, ,, he, ’, s, an, avi...      M     10\n",
       "9972  [he, lives, in, dallas, with, his, wife, and, ...      M     29\n",
       "9973  [he, exhibited, in, institutions, like, kultur...      M     22\n",
       "9974  [he, has, represented, numerous, municipalitie...      M      3\n",
       "9975  [his, works, include, portrait, ,, glamour, an...      M     22\n",
       "9976  [he, began, using, haskell, during, his, senio...      M     29\n",
       "9977  [he, has, been, involved, with, streaming, med...      M      2\n",
       "9978  [he, has, also, produced, lecture, courses, fo...      M     25\n",
       "9979  [after, completing, her, degrees, at, the, uni...      F     23\n",
       "9980  [this, is, a, slightly, edited, version, of, h...      F     12\n",
       "9981  [she, received, her, b.sc, ., in, nutrition, f...      F      8\n",
       "9982  [she, is, the, author, of, pelo, bueno, y, otr...      F     24\n",
       "9983  [she, obtained, her, bachelor, of, science, de...      F     23\n",
       "9984  [dr., kanchan, singh, practices, at, singh, de...      M     30\n",
       "9985  [prior, to, joining, fresh, 'n, fit, cuisine, ...      F      8\n",
       "9986  [he, worked, on, staff, at, aopa, pilot, magaz...      M     12\n",
       "9987  [he, started, working, on, these, themes, duri...      M     18\n",
       "9988  [his, research, aims, to, understand, the, con...      M     25\n",
       "9989  [he, received, the, ph.d., degree, in, measuri...      M     25\n",
       "9990  [he, currently, practices, at, johns, hopkins,...      M     30\n",
       "9991  [she, received, her, m.a, ., in, secondary, ed...      F     31\n",
       "9992  [his, research, interests, lie, in, the, study...      M     25\n",
       "9993  [she, graduated, with, honors, in, 2000, ., ha...      F     26\n",
       "9994  [chris, primarily, teaches, anatomy, and, phys...      M     31\n",
       "9995  [always, responsive, to, the, specific, geogra...      F      2\n",
       "9996  [he, has, worked, on, numerous, projects, that...      M     29\n",
       "9997  [he, graduated, from, the, academy, of, visual...      M     22\n",
       "9998  [most, of, his, writing, is, from, the, middle...      M     12\n",
       "9999  [he, is, currently, on, the, good, news, poetr...      M     24\n",
       "\n",
       "[10000 rows x 3 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset_performance.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000, 3)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset_performance.show_data().shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'tokens', u'gender', u'label'], dtype='object')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset_performance.show_data().columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "CLASS_NAMES = range(33)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features {\n",
      "  feature {\n",
      "    key: \"comment_text\"\n",
      "    value {\n",
      "      bytes_list {\n",
      "        value: \" In her role, she is a member of an innovative team-based care model which has been recognized by Wall Street Journal and the Robert Wood Johnson Foundation. A process improvement leader with a passion for serving vulnerable populations, Amberly was recognized by her colleagues with the first Daisy Award for Extraordinary Nurses at Cambridge Health Alliance. Amberly holds a BS in Nursing from Valparaiso University and a Masters in Public Health from the University of Massachusetts Amherst. read more\"\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "  feature {\n",
      "    key: \"gender\"\n",
      "    value {\n",
      "      bytes_list {\n",
      "        value: \"F\"\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "  feature {\n",
      "    key: \"title\"\n",
      "    value {\n",
      "      int64_list {\n",
      "        value: 17\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "INPUT_DATA = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190220165938/eval-00000-of-00003.tfrecord'\n",
    "record_iterator = tf.python_io.tf_record_iterator(path=INPUT_DATA)\n",
    "string_record = next(record_iterator)\n",
    "example = tf.train.Example()\n",
    "example.ParseFromString(string_record)\n",
    "text = example.features.feature\n",
    "print(example)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Model is compatible with the `Dataset` instance.\n",
      "WARNING:tensorflow:Using past predictions. the data must match exactly (same number of lines and same order).\n"
     ]
    }
   ],
   "source": [
    "# Set recompute_predictions=False to save time if predictions are available.\n",
    "dataset_performance.add_model_prediction_to_data(model, recompute_predictions=False, class_names=CLASS_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gs://conversationai-models/nthain/tfrecords/performance_dataset_dir/prediction_data_tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738\n",
      "33\n"
     ]
    }
   ],
   "source": [
    "def _load_predictions(pred_file):\n",
    "    with file_io.FileIO(pred_file, 'r') as f:\n",
    "      # prediction file needs to fit in memory.\n",
    "      try:\n",
    "        predictions = [json.loads(line) for line in f]\n",
    "      except:\n",
    "        predictions = []\n",
    "    return predictions\n",
    "\n",
    "model_name_tmp = MODEL_NAMES[0]\n",
    "prediction_file = dataset_performance.get_path_prediction(model_name_tmp)\n",
    "print(prediction_file)\n",
    "prediction_file = os.path.join(prediction_file,\n",
    "                                 'prediction.results-00000-of-00001')\n",
    "print(len(_load_predictions(prediction_file)[0]['probabilities']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Post processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_performance_df = dataset_performance.show_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_bias_df = test_performance_df.copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Analyzing final results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 17,
     "status": "ok",
     "timestamp": 1530641283264,
     "user": {
      "displayName": "Flavien Prost",
      "photoUrl": "//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg",
      "userId": "100080410554240838905"
     },
     "user_tz": 240
    },
    "id": "Y7R4heIB5GaV",
    "outputId": "e8e0c3bc-96d8-4635-865a-275052054df8"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tokens</th>\n",
       "      <th>gender</th>\n",
       "      <th>label</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6</th>\n",
       "      <th>...</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[in, her, role, ,, she, is, a, member, of, an,...</td>\n",
       "      <td>F</td>\n",
       "      <td>17</td>\n",
       "      <td>0.001687</td>\n",
       "      <td>1.814099e-11</td>\n",
       "      <td>0.002681</td>\n",
       "      <td>0.009853</td>\n",
       "      <td>0.004227</td>\n",
       "      <td>0.055716</td>\n",
       "      <td>0.003005</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003351</td>\n",
       "      <td>0.013561</td>\n",
       "      <td>0.002040</td>\n",
       "      <td>0.001682</td>\n",
       "      <td>4.412969e-04</td>\n",
       "      <td>6.086852e-17</td>\n",
       "      <td>0.001606</td>\n",
       "      <td>0.001379</td>\n",
       "      <td>0.014635</td>\n",
       "      <td>0.000032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[his, blog, www.donaldhtaylorjr.blogspot.com, ...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "      <td>0.014774</td>\n",
       "      <td>2.716771e-13</td>\n",
       "      <td>0.005496</td>\n",
       "      <td>0.022347</td>\n",
       "      <td>0.003845</td>\n",
       "      <td>0.084480</td>\n",
       "      <td>0.000096</td>\n",
       "      <td>...</td>\n",
       "      <td>0.010309</td>\n",
       "      <td>0.001055</td>\n",
       "      <td>0.001062</td>\n",
       "      <td>0.006205</td>\n",
       "      <td>9.439933e-07</td>\n",
       "      <td>5.250679e-18</td>\n",
       "      <td>0.001204</td>\n",
       "      <td>0.000150</td>\n",
       "      <td>0.015252</td>\n",
       "      <td>0.000779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[he, has, primarily, reported, for, the, atlan...</td>\n",
       "      <td>M</td>\n",
       "      <td>12</td>\n",
       "      <td>0.016779</td>\n",
       "      <td>8.870694e-16</td>\n",
       "      <td>0.001688</td>\n",
       "      <td>0.071343</td>\n",
       "      <td>0.000560</td>\n",
       "      <td>0.029823</td>\n",
       "      <td>0.000032</td>\n",
       "      <td>...</td>\n",
       "      <td>0.018767</td>\n",
       "      <td>0.022292</td>\n",
       "      <td>0.077598</td>\n",
       "      <td>0.033979</td>\n",
       "      <td>8.196229e-05</td>\n",
       "      <td>3.315851e-11</td>\n",
       "      <td>0.007313</td>\n",
       "      <td>0.002565</td>\n",
       "      <td>0.118167</td>\n",
       "      <td>0.001603</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[andrea, 's, area, of, expertise, is, in, whol...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "      <td>0.017742</td>\n",
       "      <td>1.019689e-15</td>\n",
       "      <td>0.017150</td>\n",
       "      <td>0.052085</td>\n",
       "      <td>0.002097</td>\n",
       "      <td>0.052322</td>\n",
       "      <td>0.002627</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001580</td>\n",
       "      <td>0.145462</td>\n",
       "      <td>0.000637</td>\n",
       "      <td>0.000337</td>\n",
       "      <td>3.909138e-04</td>\n",
       "      <td>1.304484e-21</td>\n",
       "      <td>0.011515</td>\n",
       "      <td>0.000922</td>\n",
       "      <td>0.029867</td>\n",
       "      <td>0.000001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[dr., milane, was, trained, as, a, national, c...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "      <td>0.015531</td>\n",
       "      <td>1.783027e-12</td>\n",
       "      <td>0.196227</td>\n",
       "      <td>0.016471</td>\n",
       "      <td>0.002690</td>\n",
       "      <td>0.000040</td>\n",
       "      <td>0.001384</td>\n",
       "      <td>...</td>\n",
       "      <td>0.013445</td>\n",
       "      <td>0.003754</td>\n",
       "      <td>0.220090</td>\n",
       "      <td>0.081232</td>\n",
       "      <td>7.920414e-05</td>\n",
       "      <td>2.406181e-13</td>\n",
       "      <td>0.150817</td>\n",
       "      <td>0.014913</td>\n",
       "      <td>0.071632</td>\n",
       "      <td>0.000142</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 135 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              tokens gender  label  \\\n",
       "0  [in, her, role, ,, she, is, a, member, of, an,...      F     17   \n",
       "1  [his, blog, www.donaldhtaylorjr.blogspot.com, ...      M     25   \n",
       "2  [he, has, primarily, reported, for, the, atlan...      M     12   \n",
       "3  [andrea, 's, area, of, expertise, is, in, whol...      F     25   \n",
       "4  [dr., milane, was, trained, as, a, national, c...      F     25   \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0  \\\n",
       "0                                           0.001687                           \n",
       "1                                           0.014774                           \n",
       "2                                           0.016779                           \n",
       "3                                           0.017742                           \n",
       "4                                           0.015531                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1  \\\n",
       "0                                       1.814099e-11                           \n",
       "1                                       2.716771e-13                           \n",
       "2                                       8.870694e-16                           \n",
       "3                                       1.019689e-15                           \n",
       "4                                       1.783027e-12                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2  \\\n",
       "0                                           0.002681                           \n",
       "1                                           0.005496                           \n",
       "2                                           0.001688                           \n",
       "3                                           0.017150                           \n",
       "4                                           0.196227                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3  \\\n",
       "0                                           0.009853                           \n",
       "1                                           0.022347                           \n",
       "2                                           0.071343                           \n",
       "3                                           0.052085                           \n",
       "4                                           0.016471                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4  \\\n",
       "0                                           0.004227                           \n",
       "1                                           0.003845                           \n",
       "2                                           0.000560                           \n",
       "3                                           0.002097                           \n",
       "4                                           0.002690                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5  \\\n",
       "0                                           0.055716                           \n",
       "1                                           0.084480                           \n",
       "2                                           0.029823                           \n",
       "3                                           0.052322                           \n",
       "4                                           0.000040                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6  \\\n",
       "0                                           0.003005                           \n",
       "1                                           0.000096                           \n",
       "2                                           0.000032                           \n",
       "3                                           0.002627                           \n",
       "4                                           0.001384                           \n",
       "\n",
       "                                      ...                                      \\\n",
       "0                                     ...                                       \n",
       "1                                     ...                                       \n",
       "2                                     ...                                       \n",
       "3                                     ...                                       \n",
       "4                                     ...                                       \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23  \\\n",
       "0                                           0.003351                            \n",
       "1                                           0.010309                            \n",
       "2                                           0.018767                            \n",
       "3                                           0.001580                            \n",
       "4                                           0.013445                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24  \\\n",
       "0                                           0.013561                            \n",
       "1                                           0.001055                            \n",
       "2                                           0.022292                            \n",
       "3                                           0.145462                            \n",
       "4                                           0.003754                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25  \\\n",
       "0                                           0.002040                            \n",
       "1                                           0.001062                            \n",
       "2                                           0.077598                            \n",
       "3                                           0.000637                            \n",
       "4                                           0.220090                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26  \\\n",
       "0                                           0.001682                            \n",
       "1                                           0.006205                            \n",
       "2                                           0.033979                            \n",
       "3                                           0.000337                            \n",
       "4                                           0.081232                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27  \\\n",
       "0                                       4.412969e-04                            \n",
       "1                                       9.439933e-07                            \n",
       "2                                       8.196229e-05                            \n",
       "3                                       3.909138e-04                            \n",
       "4                                       7.920414e-05                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28  \\\n",
       "0                                       6.086852e-17                            \n",
       "1                                       5.250679e-18                            \n",
       "2                                       3.315851e-11                            \n",
       "3                                       1.304484e-21                            \n",
       "4                                       2.406181e-13                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29  \\\n",
       "0                                           0.001606                            \n",
       "1                                           0.001204                            \n",
       "2                                           0.007313                            \n",
       "3                                           0.011515                            \n",
       "4                                           0.150817                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30  \\\n",
       "0                                           0.001379                            \n",
       "1                                           0.000150                            \n",
       "2                                           0.002565                            \n",
       "3                                           0.000922                            \n",
       "4                                           0.014913                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31  \\\n",
       "0                                           0.014635                            \n",
       "1                                           0.015252                            \n",
       "2                                           0.118167                            \n",
       "3                                           0.029867                            \n",
       "4                                           0.071632                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32  \n",
       "0                                           0.000032                           \n",
       "1                                           0.000779                           \n",
       "2                                           0.001603                           \n",
       "3                                           0.000001                           \n",
       "4                                           0.000142                           \n",
       "\n",
       "[5 rows x 135 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_performance_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 233
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 41,
     "status": "ok",
     "timestamp": 1530641286091,
     "user": {
      "displayName": "Flavien Prost",
      "photoUrl": "//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg",
      "userId": "100080410554240838905"
     },
     "user_tz": 240
    },
    "id": "Ln2BXOg4Q6GP",
    "outputId": "bb5288e8-9f10-4796-b36e-42f5c02cb148"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tokens</th>\n",
       "      <th>gender</th>\n",
       "      <th>label</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6</th>\n",
       "      <th>...</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31</th>\n",
       "      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[in, her, role, ,, she, is, a, member, of, an,...</td>\n",
       "      <td>F</td>\n",
       "      <td>17</td>\n",
       "      <td>0.001687</td>\n",
       "      <td>1.814099e-11</td>\n",
       "      <td>0.002681</td>\n",
       "      <td>0.009853</td>\n",
       "      <td>0.004227</td>\n",
       "      <td>0.055716</td>\n",
       "      <td>0.003005</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003351</td>\n",
       "      <td>0.013561</td>\n",
       "      <td>0.002040</td>\n",
       "      <td>0.001682</td>\n",
       "      <td>4.412969e-04</td>\n",
       "      <td>6.086852e-17</td>\n",
       "      <td>0.001606</td>\n",
       "      <td>0.001379</td>\n",
       "      <td>0.014635</td>\n",
       "      <td>0.000032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[his, blog, www.donaldhtaylorjr.blogspot.com, ...</td>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "      <td>0.014774</td>\n",
       "      <td>2.716771e-13</td>\n",
       "      <td>0.005496</td>\n",
       "      <td>0.022347</td>\n",
       "      <td>0.003845</td>\n",
       "      <td>0.084480</td>\n",
       "      <td>0.000096</td>\n",
       "      <td>...</td>\n",
       "      <td>0.010309</td>\n",
       "      <td>0.001055</td>\n",
       "      <td>0.001062</td>\n",
       "      <td>0.006205</td>\n",
       "      <td>9.439933e-07</td>\n",
       "      <td>5.250679e-18</td>\n",
       "      <td>0.001204</td>\n",
       "      <td>0.000150</td>\n",
       "      <td>0.015252</td>\n",
       "      <td>0.000779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[he, has, primarily, reported, for, the, atlan...</td>\n",
       "      <td>M</td>\n",
       "      <td>12</td>\n",
       "      <td>0.016779</td>\n",
       "      <td>8.870694e-16</td>\n",
       "      <td>0.001688</td>\n",
       "      <td>0.071343</td>\n",
       "      <td>0.000560</td>\n",
       "      <td>0.029823</td>\n",
       "      <td>0.000032</td>\n",
       "      <td>...</td>\n",
       "      <td>0.018767</td>\n",
       "      <td>0.022292</td>\n",
       "      <td>0.077598</td>\n",
       "      <td>0.033979</td>\n",
       "      <td>8.196229e-05</td>\n",
       "      <td>3.315851e-11</td>\n",
       "      <td>0.007313</td>\n",
       "      <td>0.002565</td>\n",
       "      <td>0.118167</td>\n",
       "      <td>0.001603</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[andrea, 's, area, of, expertise, is, in, whol...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "      <td>0.017742</td>\n",
       "      <td>1.019689e-15</td>\n",
       "      <td>0.017150</td>\n",
       "      <td>0.052085</td>\n",
       "      <td>0.002097</td>\n",
       "      <td>0.052322</td>\n",
       "      <td>0.002627</td>\n",
       "      <td>...</td>\n",
       "      <td>0.001580</td>\n",
       "      <td>0.145462</td>\n",
       "      <td>0.000637</td>\n",
       "      <td>0.000337</td>\n",
       "      <td>3.909138e-04</td>\n",
       "      <td>1.304484e-21</td>\n",
       "      <td>0.011515</td>\n",
       "      <td>0.000922</td>\n",
       "      <td>0.029867</td>\n",
       "      <td>0.000001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[dr., milane, was, trained, as, a, national, c...</td>\n",
       "      <td>F</td>\n",
       "      <td>25</td>\n",
       "      <td>0.015531</td>\n",
       "      <td>1.783027e-12</td>\n",
       "      <td>0.196227</td>\n",
       "      <td>0.016471</td>\n",
       "      <td>0.002690</td>\n",
       "      <td>0.000040</td>\n",
       "      <td>0.001384</td>\n",
       "      <td>...</td>\n",
       "      <td>0.013445</td>\n",
       "      <td>0.003754</td>\n",
       "      <td>0.220090</td>\n",
       "      <td>0.081232</td>\n",
       "      <td>7.920414e-05</td>\n",
       "      <td>2.406181e-13</td>\n",
       "      <td>0.150817</td>\n",
       "      <td>0.014913</td>\n",
       "      <td>0.071632</td>\n",
       "      <td>0.000142</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 135 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              tokens gender  label  \\\n",
       "0  [in, her, role, ,, she, is, a, member, of, an,...      F     17   \n",
       "1  [his, blog, www.donaldhtaylorjr.blogspot.com, ...      M     25   \n",
       "2  [he, has, primarily, reported, for, the, atlan...      M     12   \n",
       "3  [andrea, 's, area, of, expertise, is, in, whol...      F     25   \n",
       "4  [dr., milane, was, trained, as, a, national, c...      F     25   \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0  \\\n",
       "0                                           0.001687                           \n",
       "1                                           0.014774                           \n",
       "2                                           0.016779                           \n",
       "3                                           0.017742                           \n",
       "4                                           0.015531                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1  \\\n",
       "0                                       1.814099e-11                           \n",
       "1                                       2.716771e-13                           \n",
       "2                                       8.870694e-16                           \n",
       "3                                       1.019689e-15                           \n",
       "4                                       1.783027e-12                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2  \\\n",
       "0                                           0.002681                           \n",
       "1                                           0.005496                           \n",
       "2                                           0.001688                           \n",
       "3                                           0.017150                           \n",
       "4                                           0.196227                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3  \\\n",
       "0                                           0.009853                           \n",
       "1                                           0.022347                           \n",
       "2                                           0.071343                           \n",
       "3                                           0.052085                           \n",
       "4                                           0.016471                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4  \\\n",
       "0                                           0.004227                           \n",
       "1                                           0.003845                           \n",
       "2                                           0.000560                           \n",
       "3                                           0.002097                           \n",
       "4                                           0.002690                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5  \\\n",
       "0                                           0.055716                           \n",
       "1                                           0.084480                           \n",
       "2                                           0.029823                           \n",
       "3                                           0.052322                           \n",
       "4                                           0.000040                           \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6  \\\n",
       "0                                           0.003005                           \n",
       "1                                           0.000096                           \n",
       "2                                           0.000032                           \n",
       "3                                           0.002627                           \n",
       "4                                           0.001384                           \n",
       "\n",
       "                                      ...                                      \\\n",
       "0                                     ...                                       \n",
       "1                                     ...                                       \n",
       "2                                     ...                                       \n",
       "3                                     ...                                       \n",
       "4                                     ...                                       \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23  \\\n",
       "0                                           0.003351                            \n",
       "1                                           0.010309                            \n",
       "2                                           0.018767                            \n",
       "3                                           0.001580                            \n",
       "4                                           0.013445                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24  \\\n",
       "0                                           0.013561                            \n",
       "1                                           0.001055                            \n",
       "2                                           0.022292                            \n",
       "3                                           0.145462                            \n",
       "4                                           0.003754                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25  \\\n",
       "0                                           0.002040                            \n",
       "1                                           0.001062                            \n",
       "2                                           0.077598                            \n",
       "3                                           0.000637                            \n",
       "4                                           0.220090                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26  \\\n",
       "0                                           0.001682                            \n",
       "1                                           0.006205                            \n",
       "2                                           0.033979                            \n",
       "3                                           0.000337                            \n",
       "4                                           0.081232                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27  \\\n",
       "0                                       4.412969e-04                            \n",
       "1                                       9.439933e-07                            \n",
       "2                                       8.196229e-05                            \n",
       "3                                       3.909138e-04                            \n",
       "4                                       7.920414e-05                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28  \\\n",
       "0                                       6.086852e-17                            \n",
       "1                                       5.250679e-18                            \n",
       "2                                       3.315851e-11                            \n",
       "3                                       1.304484e-21                            \n",
       "4                                       2.406181e-13                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29  \\\n",
       "0                                           0.001606                            \n",
       "1                                           0.001204                            \n",
       "2                                           0.007313                            \n",
       "3                                           0.011515                            \n",
       "4                                           0.150817                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30  \\\n",
       "0                                           0.001379                            \n",
       "1                                           0.000150                            \n",
       "2                                           0.002565                            \n",
       "3                                           0.000922                            \n",
       "4                                           0.014913                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31  \\\n",
       "0                                           0.014635                            \n",
       "1                                           0.015252                            \n",
       "2                                           0.118167                            \n",
       "3                                           0.029867                            \n",
       "4                                           0.071632                            \n",
       "\n",
       "   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32  \n",
       "0                                           0.000032                           \n",
       "1                                           0.000779                           \n",
       "2                                           0.001603                           \n",
       "3                                           0.000001                           \n",
       "4                                           0.000142                           \n",
       "\n",
       "[5 rows x 135 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_bias_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "8m8QI4qEjtcY"
   },
   "source": [
    "# Part 4: Run evaluation metrics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "PhwSHsMtO9fF"
   },
   "source": [
    "## Performance metrics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data Format"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "At this point, our performance data is in DataFrame df, with columns:\n",
    "\n",
    "- label: True if the comment is Toxic, False otherwise.\n",
    "- < model name >: One column per model, cells contain the score from that model.\n",
    "You can run the analysis below on any data in this format. Subgroup labels can be generated via words in the text as done above, or come from human labels if you have them."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Run AUC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "XUZYCq-6N8MK"
   },
   "outputs": [],
   "source": [
    "import sklearn.metrics as metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "25    3295\n",
       "3      890\n",
       "22     661\n",
       "12     542\n",
       "26     507\n",
       "23     494\n",
       "17     481\n",
       "31     427\n",
       "30     343\n",
       "7      268\n",
       "2      265\n",
       "18     209\n",
       "16     202\n",
       "24     197\n",
       "29     194\n",
       "10     185\n",
       "6      156\n",
       "0      141\n",
       "8      102\n",
       "5       87\n",
       "20      67\n",
       "4       58\n",
       "32      50\n",
       "19      41\n",
       "9       39\n",
       "11      37\n",
       "27      32\n",
       "21      30\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_performance_df.label.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       False\n",
       "1       False\n",
       "2       False\n",
       "3       False\n",
       "4       False\n",
       "5       False\n",
       "6       False\n",
       "7        True\n",
       "8       False\n",
       "9       False\n",
       "10      False\n",
       "11      False\n",
       "12      False\n",
       "13      False\n",
       "14      False\n",
       "15      False\n",
       "16      False\n",
       "17      False\n",
       "18      False\n",
       "19       True\n",
       "20      False\n",
       "21      False\n",
       "22      False\n",
       "23      False\n",
       "24      False\n",
       "25      False\n",
       "26      False\n",
       "27      False\n",
       "28       True\n",
       "29      False\n",
       "        ...  \n",
       "9970    False\n",
       "9971    False\n",
       "9972    False\n",
       "9973    False\n",
       "9974     True\n",
       "9975    False\n",
       "9976    False\n",
       "9977    False\n",
       "9978    False\n",
       "9979    False\n",
       "9980    False\n",
       "9981    False\n",
       "9982    False\n",
       "9983    False\n",
       "9984    False\n",
       "9985    False\n",
       "9986    False\n",
       "9987    False\n",
       "9988    False\n",
       "9989    False\n",
       "9990    False\n",
       "9991    False\n",
       "9992    False\n",
       "9993    False\n",
       "9994    False\n",
       "9995    False\n",
       "9996    False\n",
       "9997    False\n",
       "9998    False\n",
       "9999    False\n",
       "Name: label, Length: 10000, dtype: bool"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_performance_df['label'] == 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       0.009853\n",
       "1       0.022347\n",
       "2       0.071343\n",
       "3       0.052085\n",
       "4       0.016471\n",
       "5       0.101164\n",
       "6       0.011855\n",
       "7       0.001939\n",
       "8       0.577954\n",
       "9       0.128116\n",
       "10      0.014246\n",
       "11      0.022629\n",
       "12      0.050127\n",
       "13      0.205395\n",
       "14      0.038603\n",
       "15      0.045960\n",
       "16      0.652514\n",
       "17      0.099024\n",
       "18      0.055800\n",
       "19      0.167238\n",
       "20      0.056128\n",
       "21      0.073346\n",
       "22      0.040896\n",
       "23      0.046719\n",
       "24      0.066602\n",
       "25      0.015700\n",
       "26      0.018788\n",
       "27      0.099245\n",
       "28      0.744404\n",
       "29      0.054567\n",
       "          ...   \n",
       "9970    0.025056\n",
       "9971    0.032513\n",
       "9972    0.059166\n",
       "9973    0.030145\n",
       "9974    0.146219\n",
       "9975    0.132243\n",
       "9976    0.061952\n",
       "9977    0.497093\n",
       "9978    0.154263\n",
       "9979    0.033800\n",
       "9980    0.041427\n",
       "9981    0.000079\n",
       "9982    0.071002\n",
       "9983    0.961150\n",
       "9984    0.017224\n",
       "9985    0.113003\n",
       "9986    0.040686\n",
       "9987    0.729384\n",
       "9988    0.025192\n",
       "9989    0.066657\n",
       "9990    0.025502\n",
       "9991    0.011763\n",
       "9992    0.007214\n",
       "9993    0.004737\n",
       "9994    0.044174\n",
       "9995    0.125944\n",
       "9996    0.199613\n",
       "9997    0.018891\n",
       "9998    0.218019\n",
       "9999    0.052486\n",
       "Name: tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3, Length: 10000, dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "_model = 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738'\n",
    "_class = 3\n",
    "test_performance_df['{}_{}'.format(_model, _class)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 32,
     "status": "ok",
     "timestamp": 1530641399913,
     "user": {
      "displayName": "Flavien Prost",
      "photoUrl": "//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg",
      "userId": "100080410554240838905"
     },
     "user_tz": 240
    },
    "id": "yc8SWZbqMwA4",
    "outputId": "6e9399b8-ce22-42bb-c318-959bae73f6c0",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.472880379306\n",
      "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n",
      "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.494346987625\n",
      "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.5094779166\n",
      "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.579115768006\n",
      "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.495869234756\n",
      "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.468048349118\n",
      "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.485770898896\n",
      "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.491489665173\n",
      "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.47350564638\n",
      "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.488175572414\n",
      "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.52613046651\n",
      "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.496119960142\n",
      "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n",
      "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n",
      "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n",
      "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.520060671101\n",
      "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.502598042781\n",
      "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.471809136308\n",
      "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.589720292223\n",
      "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.464268809982\n",
      "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.451838849883\n",
      "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.501252940388\n",
      "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.522887952293\n",
      "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.50126994171\n",
      "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.502592883032\n",
      "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.4976489476\n",
      "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.413984124197\n",
      "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n",
      "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.485232058639\n",
      "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.475149523707\n",
      "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.512695371032\n",
      "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.443107537688\n",
      "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.473124962683\n",
      "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n",
      "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.502436065161\n",
      "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.497505395972\n",
      "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.533997183665\n",
      "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.516225645878\n",
      "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.479381557424\n",
      "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.503250547509\n",
      "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.501472866374\n",
      "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.511796004417\n",
      "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.472370750781\n",
      "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.503774777488\n",
      "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.482292660736\n",
      "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n",
      "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n",
      "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n",
      "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.509781244505\n",
      "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.511501561927\n",
      "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.466850476392\n",
      "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.616544907291\n",
      "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.517680398972\n",
      "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.48543965229\n",
      "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498092928991\n",
      "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.527383088967\n",
      "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.516476102053\n",
      "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498915515\n",
      "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498317975812\n",
      "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.439794843499\n",
      "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n",
      "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.509969175195\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/sklearn/metrics/ranking.py:571: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless\n",
      "  UndefinedMetricWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.493638808206\n",
      "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.508299713945\n",
      "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.457780904523\n",
      "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.496740926496\n",
      "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n",
      "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499153608357\n",
      "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499355443456\n",
      "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.519405656255\n",
      "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.510566062676\n",
      "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.480932677982\n",
      "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.492101760004\n",
      "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.521062880598\n",
      "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.46758254629\n",
      "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.475540747064\n",
      "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.540092938467\n",
      "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.486065994621\n",
      "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n",
      "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n",
      "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n",
      "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.488949553253\n",
      "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.512517147563\n",
      "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.480352770023\n",
      "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.524139214683\n",
      "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.531170784555\n",
      "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.486539618857\n",
      "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.493480481944\n",
      "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.493649014345\n",
      "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.519584546531\n",
      "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.502616827295\n",
      "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499241317853\n",
      "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.527983296549\n",
      "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n",
      "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.513514238074\n",
      "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.505267708646\n",
      "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.526942603747\n",
      "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.416369849246\n",
      "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.492310370551\n",
      "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n",
      "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.510422808191\n",
      "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.497258969647\n",
      "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.533468253803\n",
      "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.51988275004\n",
      "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.446890074912\n",
      "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.470106311844\n",
      "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.480683362454\n",
      "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.519891680117\n",
      "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.498969861354\n",
      "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.49575049304\n",
      "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.496308597575\n",
      "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n",
      "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n",
      "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n",
      "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.497497468669\n",
      "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.498361194233\n",
      "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.454219503411\n",
      "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.554294558911\n",
      "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.510198929845\n",
      "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.443848211301\n",
      "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.511251516464\n",
      "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.527593056506\n",
      "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.517610635095\n",
      "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.507171714086\n",
      "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.494850664384\n",
      "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.433402513042\n",
      "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n",
      "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.529500137723\n",
      "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.485269677036\n",
      "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.513662670014\n",
      "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.429722613065\n"
     ]
    }
   ],
   "source": [
    "auc_list = []\n",
    "for _model in MODEL_NAMES:\n",
    "    for _class in CLASS_NAMES:\n",
    "        fpr, tpr, thresholds = metrics.roc_curve(\n",
    "            test_performance_df['label'] == _class,\n",
    "            test_performance_df['{}_{}'.format(_model, _class)])\n",
    "        _auc = metrics.auc(fpr, tpr)\n",
    "        auc_list.append(_auc)\n",
    "        print ('Auc for class {} model {}: {}'.format(_class, _model, _auc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_class_from_col_name(col_name):\n",
    "    pattern = r'^.*_(\\d+)$'\n",
    "    return int(re.search(pattern, col_name).group(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_best_class(df, model_name, class_names):\n",
    "    model_class_names = ['{}_{}'.format(model_name, class_name) for class_name in class_names]\n",
    "    sub_df = df[model_class_names]\n",
    "    df['{}_class'.format(model_name)] = sub_df.idxmax(axis=1).apply(get_class_from_col_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _model in MODEL_NAMES:\n",
    "    find_best_class(test_performance_df, _model, CLASS_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.0572\n",
      "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.0639\n",
      "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.0681\n",
      "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.0623\n"
     ]
    }
   ],
   "source": [
    "accuracy_list = []\n",
    "for _model in MODEL_NAMES:\n",
    "    is_correct = (test_performance_df['{}_class'.format(_model)] == test_performance_df['label'])\n",
    "    _acc = sum(is_correct)/len(is_correct)\n",
    "    accuracy_list.append(_acc)\n",
    "    print ('Accuracy for model {}: {}'.format(_model, _acc))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "vTrKsfIcxoBh"
   },
   "source": [
    "## Unintended Bias Metrics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "D3ZJSKY8FHFH"
   },
   "source": [
    "### Data Format\n",
    "At this point, our bias data is in DataFrame df, with columns:\n",
    "\n",
    "*   label: True if the comment is Toxic, False otherwise.\n",
    "*   < model name >: One column per model, cells contain the score from that model.\n",
    "*   < subgroup >: One column per identity, True if the comment mentions this identity.\n",
    "\n",
    "You can run the analysis below on any data in this format. Subgroup labels can be \n",
    "generated via words in the text as done above, or come from human labels if you have them.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'male'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-35-d94e49a61360>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0midentity_terms_civil_included\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_term\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minput_fn_example\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midentity_terms_civil\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m     \u001b[0;32mif\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_bias_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0m_term\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m         \u001b[0;32mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'keeping {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_term\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0midentity_terms_civil_included\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_term\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   2137\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2138\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2139\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2141\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_getitem_column\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   2144\u001b[0m         \u001b[0;31m# get column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2145\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2146\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2148\u001b[0m         \u001b[0;31m# duplicate columns & possible reduce dimensionality\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/generic.pyc\u001b[0m in \u001b[0;36m_get_item_cache\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m   1840\u001b[0m         \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1841\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1842\u001b[0;31m             \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1843\u001b[0m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1844\u001b[0m             \u001b[0mcache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/internals.pyc\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, item, fastpath)\u001b[0m\n\u001b[1;32m   3841\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3842\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3843\u001b[0;31m                 \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3844\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3845\u001b[0m                 \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/indexes/base.pyc\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   2525\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2526\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2527\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2529\u001b[0m         \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'male'"
     ]
    }
   ],
   "source": [
    "identity_terms_civil_included = []\n",
    "for _term in input_fn_example.identity_terms_civil:\n",
    "    if sum(test_bias_df[_term]) >= 20:\n",
    "        print ('keeping {}'.format(_term))\n",
    "        identity_terms_civil_included.append(_term)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_bias_df['model_1'] = test_bias_df['tf_gru_attention_civil:v_20181109_164318']\n",
    "test_bias_df['model_2'] = test_bias_df['tf_gru_attention_civil:v_20181109_164403']\n",
    "test_bias_df['model_3'] = test_bias_df['tf_gru_attention_civil:v_20181109_164535']\n",
    "test_bias_df['model_4'] = test_bias_df['tf_gru_attention_civil:v_20181109_164630']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_NAMES = ['model_1', 'model_2', 'model_3', 'model_4']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bias_metrics = model_bias_analysis.compute_bias_metrics_for_models(test_bias_df, identity_terms_civil_included, MODEL_NAMES, 'label')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_bias_analysis.plot_auc_heatmap(bias_metrics, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_bias_analysis.plot_aeg_heatmap(bias_metrics, MODEL_NAMES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "default_view": {},
   "name": "jigsaw-evaluation-pipeline.ipynb",
   "provenance": [],
   "version": "0.3.2",
   "views": {}
  },
  "kernelspec": {
   "display_name": "models_eval",
   "language": "python",
   "name": "models_eval"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: model_evaluation/requirements.txt
================================================
google-api-python-client==1.7.3
Markdown==2.6.11
nltk==3.9
numpy==1.22.0
pandas==0.22.0
requests==2.32.2
seaborn==0.8.1
scikit-learn==0.19.1
scipy==1.10.0
sklearn==0.0
six==1.11.0
tensorflow==2.12.1
jupyter==1.0.0
matplotlib==2.0.2
nltk==3.9


================================================
FILE: model_evaluation/score_bias_data.sh
================================================
#!/bin/bash

MODEL_NAMES='tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113247,'\
'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113241,'\
'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113114,'\
'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113106,'\
'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_163707,'\
'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_163723'

CLASS_NAMES='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32'
TEST_DATA='biasbios'
OUTPUT_PATH='gs://conversationai-models/biosbias/scored_data/standard_test.csv'

echo """
Running...

python score_test_data.py \\
 --model_names=$MODEL_NAMES \\
 --class_names=$CLASS_NAMES \\
 --test_data=$TEST_DATA \\
 --output_path=$OUTPUT_PATH
"""

python score_test_data.py \
 --model_names=$MODEL_NAMES \
 --class_names=$CLASS_NAMES \
 --test_data=$TEST_DATA \
 --output_path=$OUTPUT_PATH

================================================
FILE: model_evaluation/score_scrubbed_data.sh
================================================
#!/bin/bash

MODEL_NAMES='tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113045,'\
'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954'

CLASS_NAMES='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32'
TEST_DATA='scrubbed_biasbios'
OUTPUT_PATH='gs://conversationai-models/biosbias/scored_data/scrubbed_test.csv'

echo """
Running...

python score_test_data.py \\
 --model_names=$MODEL_NAMES \\
 --class_names=$CLASS_NAMES \\
 --test_data=$TEST_DATA \\
 --output_path=$OUTPUT_PATH
"""

python score_test_data.py \
 --model_names=$MODEL_NAMES \
 --class_names=$CLASS_NAMES \
 --test_data=$TEST_DATA \
 --output_path=$OUTPUT_PATH

================================================
FILE: model_evaluation/score_test_data.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Convenience script to score some data with CMLE models."""

import getpass
import nltk
import os
import pandas as pd
import random
import tensorflow as tf

import input_fn_example
from utils_export.dataset import Dataset, Model
from utils_export import utils_cloudml
from utils_export import utils_tfrecords

tf.app.flags.DEFINE_string(
    'model_names', None, 'Comma separated list of model names deployed on ML Engine.')
tf.app.flags.DEFINE_string(
    'class_names', None, 'Comma separated list of class names to evaluate.')
tf.app.flags.DEFINE_string('test_data', None,
                           'Test data to evaluate on. Must correspond to one in input_fn_example.py.')
tf.app.flags.DEFINE_string('output_path', None,
                           'Path to write scored test data.')
tf.app.flags.DEFINE_string('project_name', 'conversationai-models',
                           'Name of GCS project.')
tf.app.flags.DEFINE_string('text_feature_name', 'tokens',
                           'Name of the text feature (see serving function call in run.py).')
tf.app.flags.DEFINE_string('sentence_key', 'comment_key',
                           'Name of input key (see serving function call in run.py).')
tf.app.flags.DEFINE_string('prediction_name', 'probabilities',
                           'Name of output prediction.')
tf.app.flags.DEFINE_integer('dataset_size', 100000,
                            'Maximum size of dataset to score.')

FLAGS = tf.app.flags.FLAGS


def get_input_fn(test_data, tokenizer, model_input_comment_field):
  if test_data == 'biasbios':
    return input_fn_example.create_input_fn_biasbios(tokenizer,
                                                     model_input_comment_field)
  elif test_data == 'scrubbed_biasbios':
    return input_fn_example.create_input_fn_biasbios(tokenizer,
                                                     model_input_comment_field,
                                                     scrubbed=True)
  else:
    raise ValueError('Dataset not currently supported.')


def tokenizer(text, lowercase=True):
  """Converts text to a list of words.

  Args:
    text: piece of text to tokenize (string).
    lowercase: whether to include lowercasing in preprocessing (boolean).
    tokenizer: Python function to tokenize the text on.

  Returns:
    A list of strings (words).
  """
  words = nltk.word_tokenize(text.decode('utf-8'))
  if lowercase:
    words = [w.lower() for w in words]
  return words


def score_data(model_names,
               class_names,
               test_data,
               output_path,
               project_name,
               text_feature_name,
               sentence_key,
               prediction_name,
               dataset_size):
  """Scores a test dataset with ML engine models and writes output as csv.

  Args:
    model_names: list of model names deployed on ML Engine.
    class_names: list of class names to evaluate.
    test_data: test data to evaluate on, must be defined in get_input_fn.
    output_path: path to write scored test data.
    project_name: name of Google Cloud project.
    text_feature_name: name of the text feature (see serving function call in run.py).
    sentence_key: name of input key (see serving function call in run.py).
    prediction_name: name of output prediction.
    dataset_size: maximum size of dataset to score.
  """
  os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530
  nltk.download('punkt')

  # Load data.
  input_fn = get_input_fn(test_data,
    tokenizer,
    model_input_comment_field=text_feature_name,
    )
  performance_dataset_dir = os.path.join(
      'gs://conversationai-models/',
      getpass.getuser(),
      'tfrecords',
      'performance_dataset_dir_3')

  dataset = Dataset(input_fn, performance_dataset_dir)
  random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future

  # Define and call model.
  model_input_spec = {
      text_feature_name: utils_tfrecords.EncodingFeatureSpec.LIST_STRING} #library will use this automatically
  dataset.load_data(dataset_size, random_filter_keep_rate=0.5)
  model = Model(
      feature_keys_spec=model_input_spec,
      prediction_keys=prediction_name,
      example_key=sentence_key,
      model_names=model_names,
      project_name=project_name)
  dataset.add_model_prediction_to_data(model, recompute_predictions=True, class_names=class_names)
  
  # Save data.
  scored_test_df = dataset.show_data()
  scored_test_df.to_csv(tf.gfile.Open(output_path, 'w'), index = False)

if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)

  model_names = [name.strip() for name in FLAGS.model_names.split(',')]
  print(model_names)
  class_names = [name.strip() for name in FLAGS.class_names.split(',')]
  print(class_names)
  score_data(model_names,
             class_names,
             FLAGS.test_data,
             FLAGS.output_path,
             FLAGS.project_name,
             FLAGS.text_feature_name,
             FLAGS.sentence_key,
             FLAGS.prediction_name,
             FLAGS.dataset_size)


================================================
FILE: model_evaluation/utils_export/__init__.py
================================================


================================================
FILE: model_evaluation/utils_export/dataset.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines the dataset structure for evaluation pipeline."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import inspect
import os

import pandas as pd
import tensorflow as tf
from tensorflow.python.platform import tf_logging as logging

import utils_export.utils_cloudml as utils_cloudml
import utils_export.utils_tfrecords as utils_tfrecords

# Quota for concurrent prediction jobs
CMLE_QUOTA_PREDICTION = 7


class Model(object):
  """Defines the spec of a CMLE Model.

    All models (given by `model_names`) need to share the feature_keys_spec,
      example_key and prediction_keys.
    Those fields define the inputs (feature_keys_spec, example_key) and output
      of the models.
    """

  def __init__(self,
               feature_keys_spec,
               prediction_keys,
               model_names,
               project_name,
               example_key='example_key'):
    """Initializes a model and defines its signature.

    Args:
      feature_keys_spec: spec of the tf_records input to the model.
      prediction_keys: Name of the keys to extract from model outputs.
      model_names: List of names of the model in Cloud MLE.
        Format should be $MODEL_NAME:$VERSION. If no version given, will take
          default version.
      project_name: name of the gcp project.
      example_key: name of the example key expected by the model.

    Raises:
      ValueError: If example_key is included in the feature_spec
        of if feature_keys_spec does not match required format.

    Note: When used with `Dataset`, the dataframe returned by the input_fn
      should not contain the `example_key`, as it will be later created by the API.
    """

    utils_tfrecords.is_valid_spec(feature_keys_spec)
    if example_key in feature_keys_spec:
      raise ValueError('example_key should not be part of input_data.'
                       'It will be created when writing to tf-records')
    self._model_name = model_names
    self._feature_keys_spec = feature_keys_spec
    self._prediction_keys = prediction_keys
    self._project_name = project_name
    self._example_key = example_key

  def feature_keys_spec(self):
    return self._feature_keys_spec

  def example_key(self):
    return self._example_key

  def model_names(self):
    return self._model_name

  def prediction_keys(self):
    return self._prediction_keys

  def project_name(self):
    return self._project_name

  def set_job_ids_prediction(self, job_ids):
    self._job_ids_prediction = job_ids

  def job_ids_prediction(self):
    return self._job_ids_prediction


class Dataset(object):
  """Defines a format for every dataset to work with evaluation pipeline.

  Usage:

  input_fn = ... (returns pandas DataFrame).
  dataset = Dataset(input_fn, dataset_dir) # Verifies that input_fn is ok.

  dataset.load_data(10000)

  model = Model(...)
  # Next function verifies that models are compatible.
  dataset.add_model_prediction_to_data(model)

  dataset.show_data()
  """

  def __init__(self, input_fn, dataset_dir):
    """Initialises a `Dataset` instance.

    Args:
      input_fn: function that returns a pandas `Dataframe`.
      dataset_dir: Directory where to save the temporary files, in particular
        tf_records inputs and outputs of CMLE.
    """
    self.check_input_fn(input_fn)
    self._input_fn = input_fn
    self._dataset_dir = dataset_dir

  def show_data(self):
    if not hasattr(self, 'data'):
      raise ValueError('Dataset does not have data yet.'
                       ' You need to run `load_data` first.')
    return self.data

  def check_input_fn(self, input_fn):
    """Checks if the input_fn meets requirements."""
    args_input_fn = inspect.getargspec(input_fn).args
    if 'max_n_examples' not in args_input_fn:
      raise ValueError('input_fn should have (at least) `max_n_examples`'
                       ' as arguments.')

    loaded_data = input_fn(max_n_examples=1)

    if not isinstance(loaded_data, pd.DataFrame):
      raise ValueError('input_fn should return a pandas DataFrame.')

    if len(loaded_data) != 1:
      raise ValueError(
          'input_fn(max_n_examples=1) should contain 1 row (exactly).')
    logging.info('input_fn is compatible with the `Dataset` class.')

  def check_compatibility(self, model):
    """Checks that input_fn is compatible with the model."""

    if hasattr(self, 'data'):
      test_df = self.data
    else:
      test_df = self._input_fn(max_n_examples=1)

    for key in model.feature_keys_spec():
      if key not in test_df.columns:
        raise ValueError(
            'input_fn must contain at least the feature keys {}'.format(
                model.feature_keys_spec()))
    logging.info('Model is compatible with the `Dataset` instance.')

  def load_data(self, max_n_examples, **kwargs):
    self.data = self._input_fn(max_n_examples=max_n_examples, **kwargs)

  def get_path_input_tf(self):
    """Returns the path to input tf-records (input of CMLE)."""
    name = 'input_data.tfrecords'
    input_path = os.path.join(self._dataset_dir, name)
    return input_path

  def get_path_prediction(self, model_name):
    """Returns the path to prediction files (output of CMLE)."""
    name = 'prediction_data_{}'.format(model_name)
    prediction_path = os.path.join(self._dataset_dir, name)
    return prediction_path

  def convert_data_to_tf(self, feature_keys_spec, example_key, overwrite=True):
    """Writes self.data to tf-records.

    Args:
      feature_keys_spec: the spec of the feature_keys. Only those fields will be
        written to tf-records.
      example_key: Name of the field for example_key. The key will be generated
        on the fly.
      overwrite: Whether to overwrite the existing tf_records.

    Raises:
      ValueError: if dataset does not have data loaded.
    """

    if not hasattr(self, 'data'):
      raise ValueError('Dataset does not have data yet.'
                       ' You need to run `load_data` first.')

    path_input_tf = self.get_path_input_tf()
    if tf.gfile.Exists(path_input_tf):
      if overwrite:
        logging.info('TF-Records already exist - overwriting them.')
      else:
        logging.info('TF-Records already exist - We will use those.')
        return

    utils_tfrecords.encode_pandas_to_tfrecords(self.data, feature_keys_spec,
                                               path_input_tf, example_key)

  def call_prediction(self, model):
    """Starts a CMLE batch prediction job for the model."""

    path_input_tf = self.get_path_input_tf()
    if not tf.gfile.Exists(path_input_tf):
      raise ValueError('Dataset does not have input_tf_records yet.'
                       ' You need to run `convert_data_to_tf` first.')
    
    if len(model.model_names()) > CMLE_QUOTA_PREDICTION:
      raise ValueError('Model should not contain more than {} versions.'
                       ' If you need more, split the version into two'
                       ' different models.'.format(CMLE_QUOTA_PREDICTION))

    job_ids = []
    for model_name_full in model.model_names():

      model_name_split = model_name_full.split(':')
      model_name = model_name_split[0]
      if len(model_name_split) > 1:
        version = model_name_split[1]
      else:
        version = None

      output_pred_path = self.get_path_prediction(model_name_full)
      job_id = utils_cloudml.call_model_predictions_from_df(
          project_name=model.project_name(),
          input_tf_records=path_input_tf,
          output_prediction_path=output_pred_path,
          model_name=model_name,
          version_name=version)
      job_ids.append(job_id)
    model.set_job_ids_prediction(job_ids)

  def collect_prediction(self, model, class_names):
    """Collects the predictions of CMLE jobs and adds it to dataframe."""

    for model_name in model.model_names():
      tf_record_prediction = self.get_path_prediction(model_name)
      self.data = utils_cloudml.add_model_predictions_to_df(
          self.data,
          prediction_file=tf_record_prediction,
          model_col_name=model_name,
          prediction_name=model.prediction_keys(),
          example_key=model.example_key(),
          class_names=class_names)

  def wait_predictions(self, model):
    """Loops until the prediction jobs of the model completed."""

    if not hasattr(model, 'job_ids_prediction'):
      raise ValueError(
          'Model does not have any `job_ids_prediction`.'
          ' You need to run `call_prediction` for CMLE batch prediction job.')

    for job_id in model.job_ids_prediction():
      utils_cloudml.check_job_over(model.project_name(), job_id)

  def add_model_prediction_to_data(self, model, recompute_predictions=True, class_names=None):
    """Computes the prediction of the model and adds it to dataframe.

    Args:
      model: a `Model` instance.
      recompute_predictions: Indicates if we run predictions (batch prediction
        job) or if we load past prediction files. If use past predictions (when
        False), the data must match exactly (same  number of lines and in same
        order).
      class_names (optional): If the model is a multiclass model, you can specify class names.
          The model will then return a logit value per class instead of a single value.
    """
    def _compute_predictions_less_than_quota(self, model, need_to_convert_data=True):
      """Runs predictions for a model that has less than $QUOTA versions."""
      if need_to_convert_data:
        self.convert_data_to_tf(model.feature_keys_spec(), model.example_key())
      self.call_prediction(model)
      self.wait_predictions(model)

    self.check_compatibility(model)

    if recompute_predictions:

      num_epochs = int(len(model.model_names()) / CMLE_QUOTA_PREDICTION)
      for i in range(0, num_epochs + 1):
        logging.info('Doing batch {}/{}'.format(i, num_epochs))
        min_index = i*CMLE_QUOTA_PREDICTION
        max_index = min((i + 1) * CMLE_QUOTA_PREDICTION, len(model.model_names()))
        sub_model_names = model.model_names()[min_index:max_index]
        sub_model = Model(
          model.feature_keys_spec(),
          model.prediction_keys(),
          sub_model_names,
          model.project_name(),
          model.example_key())
        need_to_convert_data = (i == 0)
        _compute_predictions_less_than_quota(self, sub_model, need_to_convert_data)

    else:
      logging.warning(
          'Using past predictions. '
          'the data must match exactly (same number of lines and same order).')

    self.collect_prediction(model, class_names)


================================================
FILE: model_evaluation/utils_export/dataset_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import getpass
import os
import time
import unittest

from dataset import Dataset
from dataset import Model
import pandas as pd
from utils_tfrecords import EncodingFeatureSpec


class TestCompatibleInputFn(unittest.TestCase):
  """Verifies the compatibility of input_fn with `Dataset`."""

  def testCorrect(self):

    def input_fn(max_n_examples):
      return pd.DataFrame({
          'comment_text': ['This is one'] * max_n_examples,
          'label_name': [0] * max_n_examples
      })

    try:
      Dataset(input_fn, 'dataset_dir')
    except ValueError:
      self.fail('Dataset raised an exception unexpectedly!')

  def testWrongArgInputFn(self):

    def input_fn(other_args=1.0):
      assert other_args
      return {'other_feature': ['This is one'], 'label_name': [0]}

    with self.assertRaises(Exception) as context:
      Dataset(input_fn, 'dataset_dir')
      self.assertIn('input_fn should have (at least) `max_n_examples`',
                    str(context.exception))

  def testInputFnWrongType(self):

    def input_fn(max_n_examples):
      return {
          'other_feature': ['This is one'] * max_n_examples,
          'label_name': [0] * max_n_examples
      }

    with self.assertRaises(Exception) as context:
      Dataset(input_fn, 'dataset_dir')
      self.assertIn('input_fn should return a pandas DataFrame.',
                    str(context.exception))

  def testWrongNumberOfLines(self):

    def input_fn(max_n_examples=1):
      assert max_n_examples
      return pd.DataFrame({
          'comment_text': ['This is one'] * 2,
          'label_name': [0] * 2
      })

    with self.assertRaises(Exception) as context:
      Dataset(input_fn, 'dataset_dir')
      self.assertIn(
          'input_fn(max_n_examples=1) should contain 1 row (exactly).',
          str(context.exception))


class TestModelCompatibleWithInputFn(unittest.TestCase):
  """Verifies the compatibility between input_fn and model."""

  def testBadTypeFeatureKeys(self):

    with self.assertRaises(Exception) as context:
      model = Model(
          feature_keys_spec='comment_text',
          prediction_keys='prediction_key',
          model_names='None',
          project_name=None)
      self.assertIn('Spec should be a dictionary', str(context.exception))

  def testInputFnMissingFeatureKeys(self):

    model = Model(
        feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING},
        prediction_keys='prediction_key',
        model_names='None',
        project_name=None)

    def input_fn(max_n_examples):
      return pd.DataFrame({
          'other_feature': ['This is one'] * max_n_examples,
          'label_name': [0] * max_n_examples
      })

    with self.assertRaises(Exception) as context:
      dataset = Dataset(input_fn, 'dataset_dir')
      dataset.check_compatibility(model)
      self.assertIn('input_fn must contain at least the feature keys',
                    str(context.exception))

  def testModelIsCompatibleWithDataset(self):
    model = Model(
        feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING},
        prediction_keys='prediction_key',
        model_names='None',
        project_name=None)

    def input_fn(max_n_examples):
      return pd.DataFrame({
          'comment_text': ['This is one'] * max_n_examples,
          'label_name': [0] * max_n_examples
      })

    try:
      dataset = Dataset(input_fn, 'dataset_dir')
      dataset.check_compatibility(model)
    except ValueError:
      self.fail('Dataset raised an exception unexpectedly!')


class TestEndPipeline(unittest.TestCase):
  """Verifies end-to-end use of dataset."""

  test_version = str(int(time.time()))

  def setUp(self):

    def input_fn_test(max_n_examples):
      return pd.DataFrame(
          {'comment_text': [['This', 'is', 'one']] * max_n_examples})

    gcs_path_test = os.path.join('gs://kaggle-model-experiments/',
                                 getpass.getuser(), 'unittest', 'dataset_test',
                                 TestEndPipeline.test_version)
    self.dataset = Dataset(input_fn_test, gcs_path_test)
    self.dataset.load_data(5)

    model_input_spec = {
        'comment_text': EncodingFeatureSpec.LIST_STRING,
    }
    self.model = Model(
        feature_keys_spec=model_input_spec,
        prediction_keys='frac_neg/logistic',
        example_key='comment_key',
        model_names=[
            'tf_gru_attention:v_20180914_163804',
            'tf_gru_attention:v_20180823_133625'
        ],
        project_name='wikidetox')

  def testComputePredictions(self):
    try:
      self.dataset.add_model_prediction_to_data(self.model)
    except ValueError:
      self.fail('Dataset raised an exception unexpectedly!')

  def testLoadPredictions(self):
    try:
      self.dataset.add_model_prediction_to_data(
          self.model, recompute_predictions=False)
    except ValueError:
      self.fail('Dataset raised an exception unexpectedly!')


if __name__ == '__main__':
  unittest.main()


================================================
FILE: model_evaluation/utils_export/deploy_list_models.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Deploys all models that have been saved in a list of directories."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import datetime
import os
import sys
import time

from googleapiclient import discovery
from googleapiclient import errors
import tensorflow as tf
from tensorflow.python.lib.io import file_io
from tensorflow.python.platform import tf_logging as logging

# Maximum number of version that can be created concurrently.
CLOUD_ML_VERSION_CREATE_QUOTA = 10


def get_list_models_to_export(parent_model_dir):
  """Gets the paths of all models that are in parent_model_dir."""
  _list = []
  for subdirectory, _, files in tf.gfile.Walk(parent_model_dir):
    if 'saved_model.pb' in files:  # Indicator of a saved model.
      _list.append(subdirectory)
  return _list


def check_model_exists(project_name, model_name):
  """Verifies if a model name is deployed already on CMLE."""
  ml = discovery.build('ml', 'v1')

  model_id = 'projects/{}/models/{}'.format(project_name, model_name)
  request = ml.projects().models().get(name=model_id)
  try:
    response = request.execute()
    return True
  except:
    return False


def create_model(project_name, model_name):
  """Creates a model on CMLE."""
  ml = discovery.build('ml', 'v1')

  request_dict = {'name': model_name}
  project_id = 'projects/{}'.format(project_name)
  request = ml.projects().models().create(parent=project_id, body=request_dict)
  try:
    response = request.execute()
  except errors.HttpError as err:
    raise ValueError('There was an error creating the model.' +
                     ' Check the details: {}'.format(err._get_reason()))


def create_version(project_name, model_name, version_name, model_dir):
  """Creates a version of a model on CMLE."""

  ml = discovery.build('ml', 'v1')
  request_dict = {
      'name': version_name,
      'deploymentUri': model_dir,
      'runtimeVersion': '1.10'
  }
  model_id = 'projects/{}/models/{}'.format(project_name, model_name)
  request = ml.projects().models().versions().create(
      parent=model_id, body=request_dict)

  try:
    response = request.execute()
    operation_id = response['name']
    return operation_id

  except errors.HttpError as err:
    raise ValueError('There was an error creating the version.' +
                     ' Check the details:'.format(err._get_reason()))


def check_version_deployed(operation_id):
  """Loops until the version has been deployed on CMLE."""

  ml = discovery.build('ml', 'v1')
  request = ml.projects().operations().get(name=operation_id)

  done = False
  while not done:
    response = None
    time.sleep(0.3)
    try:
      response = request.execute()
      done = response.get('done', False)
    except errors.HttpError as err:
      raise ValueError('There was an error getting the operation.' +
                       ' Check the details: {}'.format(err._get_reason()))
      done = True


def deploy_model_version(project_name, model_name, version_name, model_dir):
  """Deploys one TF model on CMLE.

  Args:
    project_name: Name of a CMLE project.
    model_name: Name of the model to deploy. If it does not exist yet, the model
      will be created.
    version_name: Version of the model on CMLE.
    Model_dir: Where to find the exported model.
  """

  if not check_model_exists(project_name, model_name):
    create_model(project_name, model_name)
  operation_id = create_version(project_name, model_name, version_name,
                                model_dir)
  return operation_id


def _get_version_name(model_dir, go_up_3=True):
  """Looks for the version_name in the model_directory name.
  
  Example: model_dir = gs://.../20190328_103329/model_dir/102500/1553798665/
    If go_up_3, it will grab '20190328_103329'
    if not, it will grab '1553798665'.
  Typically speaking, set up go_up_3=False if a model_run has several exported models."""
  if go_up_3:
    name = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(model_dir))))
    return 'v_{}'.format(os.path.basename(name))
  else:
    return 'v_{}'.format(os.path.basename(os.path.dirname(model_dir)))


def deploy_all_models(list_model_dir, project_name, model_name):
  """Finds and deploys all models present a list of directories.

  Args:
    list_model_dir: List of directories to explore.
    project_name: Name of the project.
    model_name: Name of the model. All the model found in the parent_dir will be
      saved within the same main model.
  """

  models = []
  for _model_dir in args.list_model_dir.split(','):
    models.extend(get_list_models_to_export(_model_dir))
  logging.info('Exploration finished: {} models detected.'.format(
      len(models)))

  num_epochs = int(len(models) / CLOUD_ML_VERSION_CREATE_QUOTA)
  for i in range(0, num_epochs + 1):
    indices = range(i * CLOUD_ML_VERSION_CREATE_QUOTA,
                    (i + 1) * CLOUD_ML_VERSION_CREATE_QUOTA)
    operation_id_list = []
    for j in indices:
      if j >= len(models):
        break
      version_name = _get_version_name(models[j])
      operation_id = deploy_model_version(
          project_name=project_name,
          model_name=model_name,
          version_name=version_name,
          model_dir=models[j])
      operation_id_list.append(operation_id)

    logging.info('Waiting for versions to be deployed...')
    for operation_id in operation_id_list:
      check_version_deployed(operation_id)

  logging.info('DONE. {} models have been deployed'.format(len(models)))


if __name__ == '__main__':

  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--list_model_dir',
      help='List of the model directory (comma separated).',
      required=True
  )
  parser.add_argument(
      '--project_name', help='Name of GCP project.', default='conversationai-models')
  parser.add_argument(
      '--model_name',
      help='Name of the model on CMLE.',
      default='tf_test')
  args = parser.parse_args(args=sys.argv[1:])

  tf.logging.set_verbosity(tf.logging.INFO)

  deploy_all_models(args.list_model_dir, args.project_name, args.model_name)


================================================
FILE: model_evaluation/utils_export/utils_cloudml.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines some utilities to use cloud MLE batch prediction jobs."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datetime
import json
import os
import re
import time

import googleapiclient.discovery as discovery
import googleapiclient.errors as errors
import tensorflow as tf
from tensorflow.python.lib.io import file_io
from tensorflow.python.platform import tf_logging as logging


def call_model_predictions_from_df(project_name,
                                   input_tf_records,
                                   output_prediction_path,
                                   model_name,
                                   version_name=None):
  """Calls a prediction job.

  Args:
    project_name: gcp project name.
    input_tf_records: gcs path to input tf_records.
    output_prediction_path: gcs path to store tf_records, which will be outputs
      to batch prediction job.
    model_name: Model name used to run predictions. The model must take as
      inputs TF-Records with fields $TEXT_FEATURE_NAME and $SENTENCE_KEY, and
      should return a dictionary including the field $LABEL_NAME.
    version_name: Model version to run predictions. If None, it will use default
      version of the model.

  Returns:
    job_id: the job_id of the prediction job.

  Raises:
    ValueError: if input_tf_records does not exist.
  """

  # Create tf-records if necessary.
  if not file_io.file_exists(input_tf_records):
    raise ValueError('tf_records do not exist.')

  # Call batch prediction job.
  job_id = _call_batch_job(
      project_name,
      input_paths=input_tf_records,
      output_path=output_prediction_path,
      model_name=model_name,
      version_name=version_name)

  return job_id


def _call_batch_job(project_name,
                    input_paths,
                    output_path,
                    model_name,
                    version_name=None):
  """Calls a batch prediction job on Cloud MLE."""

  batch_predict_body = _make_batch_job_body(
      project_name,
      input_paths,
      output_path,
      model_name,
      version_name=version_name)

  project_id = 'projects/{}'.format(project_name)

  ml = discovery.build('ml', 'v1')
  request = ml.projects().jobs().create(
      parent=project_id, body=batch_predict_body)

  try:
    response = request.execute()
    logging.info('state : {}'.format(response['state']))
    return response['jobId']

  except errors.HttpError as err:
    # Something went wrong, print out some information.
    logging.info('There was an error getting the prediction results.'
                 'Check the details:')
    logging.info(err._get_reason())


def _make_batch_job_body(project_name,
                         input_paths,
                         output_path,
                         model_name,
                         region='us-central1',
                         data_format='TF_RECORD',
                         version_name=None,
                         max_worker_count=None,
                         runtime_version=None):
  """Creates the request body for Cloud MLE batch prediction job."""

  project_id = 'projects/{}'.format(project_name)
  model_id = '{}/models/{}'.format(project_id, model_name)
  if version_name:
    version_id = '{}/versions/{}'.format(model_id, version_name)

  # Make a jobName of the format "model_name_batch_predict_YYYYMMDD_HHMMSS"
  timestamp = time.strftime('%Y%m%d_%H%M%S', time.gmtime())

  # Make sure the project name is formatted correctly to work as the basis
  # of a valid job name.
  clean_project_name = re.sub(r'\W+', '_', project_name)

  job_id = '{}_{}_{}'.format(clean_project_name, model_name, timestamp)

  # Start building the request dictionary with required information.
  body = {
      'jobId': job_id,
      'predictionInput': {
          'dataFormat': data_format,
          'inputPaths': input_paths,
          'outputPath': output_path,
          'region': region
      }
  }

  # Use the version if present, the model (its default version) if not.
  if version_name:
    body['predictionInput']['versionName'] = version_id
  else:
    body['predictionInput']['modelName'] = model_id

  # Only include a maximum number of workers or a runtime version if specified.
  # Otherwise let the service use its defaults.
  if max_worker_count:
    body['predictionInput']['maxWorkerCount'] = max_worker_count

  if runtime_version:
    body['predictionInput']['runtimeVersion'] = runtime_version

  return body


def check_job_over(project_name, job_name):
  """Sleeps until the batch job is over."""

  ml = discovery.build('ml', 'v1')
  request = ml.projects().jobs().get(
      name='projects/{}/jobs/{}'.format(project_name, job_name))
  job_completed = False
  k = 0
  start_time = datetime.datetime.now()
  while not job_completed:
    response = request.execute()
    job_completed = (response['state'] == 'SUCCEEDED')
    if not job_completed:
      if not (k % 5):
        time_spent = int(
            (datetime.datetime.now() - start_time).total_seconds() / 60)
        logging.info(
            'Waiting for prediction job to complete. Minutes elapsed: {}'
            .format(time_spent))
      time.sleep(30)
    k += 1

  logging.info('Prediction job completed.')


def add_model_predictions_to_df(df, prediction_file, model_col_name,
                                prediction_name, example_key, class_names):
  """Loads the prediction files and adds the model scores to a DataFrame.

  Args:
    df: a pandas `DataFrame`.
    prediction_file: Path to the prediction files (outputs of CMLE prediction
      job).
    model_col_name: Column name of the prediction values in df (added column).
    prediction_name: Name of the column to retrieve from CMLE predictions.
    example_key: key identifier of an example.
    class_names: If the model is a multiclass model, you can specify class names.
          The model will then return a logit value per class instead of a single value.
  Returns:
    df: a pandas ` DataFrame` with an added column named 'column_name_of_model'
      containing the prediction values.

  Raises:
    ValueError: dataframe and  prediction file do not correspond exactly
      In particular, they must have same number of lines and same order.
    ValueError: prediction file does not exist.

  This function reads the prediction file and extracts the fields
  'prediction_name'
    and example_key. It orders the results based on example_key and then adds
    them to df
    in a new column called 'model_col_name'.
  """

  prediction_file = os.path.join(prediction_file,
                                 'prediction.results-00000-of-00001')
  if not tf.gfile.Exists(prediction_file):
    raise ValueError(
        'Prediction file does not exist.'
        ' You need to call prediction job and wait for completion.')

  def _load_predictions(pred_file):
    with file_io.FileIO(pred_file, 'r') as f:
      # prediction file needs to fit in memory.
      try:
        predictions = [json.loads(line) for line in f]
      except:
        predictions = []
    return predictions

  predictions = _load_predictions(prediction_file)

  if not predictions:
    raise ValueError(
        'The prediction file returned by CMLE is empty.'
        ' It might be due to a badly formatted tfrecord input file that can not be'
        ' parsed by CMLE (wrong input signature given by a `Model` instance).'
        ' Check the logs of your CMLE job for further details.')
  if example_key not in predictions[0]:
    raise ValueError(
        "Predictions do not contain the 'example_key' field."
        " Verify that your 'example_key' parameter (set to {})"
        " matches the CMLE model signature.".format(example_key))
  if prediction_name not in predictions[0]:
    raise ValueError(
        "Predictions do not contain the 'prediction_name' field."
        " Verify that your 'prediction_name' parameter (set to {})"
        " matches the CMLE model signature.".format(prediction_name))
  if len(predictions) != len(df):
    raise ValueError('The dataframe and the prediction file do not contain'
                     ' the same number of lines.')

  predictions = sorted(predictions, key=lambda x: x[example_key])
  if class_names is None:
      prediction_proba = [x[prediction_name][0] for x in predictions]
      df[model_col_name] = prediction_proba
  else:
      for i, class_name in enumerate(class_names):
            df['{}_{}'.format(model_col_name,class_name)] = [x[prediction_name][i] for x in predictions]

  return df


================================================
FILE: model_evaluation/utils_export/utils_cloudml_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf records utilities."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import unittest

import utils_cloudml


class CallModelPredictionsFromDf(unittest.TestCase):
  """Tests for `call_model_predictions_from_df`."""

  #TODO(fprost): Implement these.

  def test_correct(self):
    return


class CheckJobOver(unittest.TestCase):
  """Tests for `check_job_over`."""

  # TODO(fprost): Implement these.
  def test_correct(self):
    return


class AddModelPredictionsToDf(unittest.TestCase):
  """Tests for `add_model_predictions_to_df`."""

  def setUp(self):
    self.COMMENT_KEY = 'comment_key'
    self._df = pd.DataFrame({
        self.COMMENT_KEY: [0, 1],
        'other_field_1': ['I am a man', 'I am a woman'],
        })
    self._prediction_file = 'gs://kaggle-model-experiments/files_for_unittest/model1:v1'
    self._model_col_name = 'model1:v1_preds'
    self._prediction_name = 'toxicity/logistic'
    self._example_key = self.COMMENT_KEY

  def test_missing_prediction_file(self):
    path = 'not_existing_folder/not_existing_file_path'

    with self.assertRaises(Exception) as context:
      utils_cloudml.add_model_predictions_to_df(
          self._df,
          path,
          self._model_col_name,
          self._prediction_name,
          self._example_key)
      self.assertIn(
          'Prediction file does not exist.',
          str(context.exception))

  def test_empty_prediction_file(self):
    path = 'gs://kaggle-model-experiments/files_for_unittest/for_empty_predictions'

    with self.assertRaises(Exception) as context:
      utils_cloudml.add_model_predictions_to_df(
          self._df,
          path,
          self._model_col_name,
          self._prediction_name,
          self._example_key)
    self.assertIn(
        'The prediction file returned by CMLE is empty.',
        str(context.exception))

  def test_missing_example_key(self):
    example_key = 'not_found_example_key'
    with self.assertRaises(Exception) as context:
      utils_cloudml.add_model_predictions_to_df(
          self._df,
          self._prediction_file,
          self._model_col_name,
          self._prediction_name,
          example_key,
          )
    self.assertIn(
        "Predictions do not contain the 'example_key' field.",
        str(context.exception))

  def test_missing_prediction_key(self):
    prediction_key = 'not_found_prediction_key'
    with self.assertRaises(Exception) as context:
      utils_cloudml.add_model_predictions_to_df(
          self._df,
          self._prediction_file,
          self._model_col_name,
          prediction_key,
          self._example_key)
    self.assertIn(
        "Predictions do not contain the 'prediction_name' field.",
        str(context.exception))

  def test_correct(self):
    output_df = utils_cloudml.add_model_predictions_to_df(
        self._df,
        self._prediction_file,
        self._model_col_name,
        self._prediction_name,
        self._example_key)
    right_output = pd.DataFrame({
        self.COMMENT_KEY: [0, 1],
        'other_field_1': ['I am a man', 'I am a woman'],
        self._model_col_name: [0.38753455877304077, 0.045782867819070816]
        })
    pd.testing.assert_frame_equal(
        output_df.sort_index(axis=1), right_output.sort_index(axis=1))


if __name__ == '__main__':
  unittest.main()

================================================
FILE: model_evaluation/utils_export/utils_tfrecords.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines some utilities to use TF-Records with pandas DataFrame."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import pandas as pd
import random
import re

import tensorflow as tf
from tensorflow.python.lib.io import file_io
from tensorflow.python.platform import tf_logging as logging


def _bytes_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_list_feature(value_list):
  return tf.train.Feature(
      bytes_list=tf.train.BytesList(
          value=[tf.compat.as_bytes(value) for value in value_list]))


class EncodingFeatureSpec(object):

  INTEGER = 'integer'
  STRING = 'string'
  LIST_STRING = 'list_string'

  CONSTRUCTOR_PER_TYPE = {
      INTEGER: _int64_feature,
      STRING: _bytes_feature,
      LIST_STRING: _bytes_list_feature
  }


def is_valid_spec(spec):
  """Verfies that the spec matches requirements."""
  if not isinstance(spec, dict):
    raise ValueError('Spec should be a dictionary instance.')
  for (key, item) in spec.items():
    if not isinstance(key, str):
      raise ValueError(
          'Spec is badly defined. Keys should be string (field names).')
    if item not in EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE.keys():
      raise ValueError(
          'Spec is badly defined. Authorized types are one of {}.'.format(
              EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE.keys()))


def encode_pandas_to_tfrecords(df,
                               feature_keys_spec,
                               tf_records_path,
                               example_key=None):
  """Write a pandas `DataFrame` to a tf_record.

  Args:
    df: pandas `DataFrame`. It must include the fields that are part of
      feature_key_spec.
    feature_keys_spec: Dict of {name: type}, which describes the spec of the
      TF-records.
    tf_records_path: where to write the tf records.
    example_key: key identifier of an example (string). This key will be added
      to data automatically and should not be part of df. If none, no
      example_key will be created.

  Raises:
    ValueError if feature_keys_spec does not follow a FeatureSpec format.

  Note: TFRecords will have fields feature_keys_spec and
  `example_key`(optional).
  """

  is_valid_spec(feature_keys_spec)

  writer = tf.python_io.TFRecordWriter(tf_records_path)
  for i in range(len(df)):

    if not i % 10000:
      logging.info('Preparing train data: {}/{}'.format(i, len(df)))

    # Create a feature
    feature_dict = {}
    for feature in feature_keys_spec:
      constructor = EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE[
          feature_keys_spec[feature]]
      feature_dict[feature] = constructor(df[feature].iloc[i])
      if example_key:
        feature_dict[example_key] = _int64_feature(i)
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))

    # Serialize to string and write on the file
    writer.write(example.SerializeToString())

  writer.close()


def decode_tf_records_to_pandas(decoding_features_spec,
                                tf_records_path,
                                max_n_examples=None,
                                random_filter_keep_rate=1.0,
                                filter_fn=None):
  """Loads tf-records into a pandas dataframe.

  Args:
    decoding_features_spec: A dict mapping feature keys to FixedLenFeature
      values. Spec of the tf-records.
    tf_records_path: path to the file
    max_n_examples: Maximum number of examples to extract.
    random_filter_keep_rate: Probability for each line to be kept in training
      data. For each line, we generate a random number x and keep it if x <
      random_filter_keep_rate.
    filter_fn (optional): Function applied to an example. If it returns False,
      the example will be discarded.

  Returns:
    A pandas `DataFrame`.
  """

  if not max_n_examples:
    max_n_examples = float('inf')

  reader = tf.TFRecordReader()
  filenames = tf.train.match_filenames_once(tf_records_path)
  filename_queue = tf.train.string_input_producer(filenames,
                                                  num_epochs=1)

  _, serialized_example = reader.read(filename_queue)
  read_data = tf.parse_single_example(
      serialized=serialized_example, features=decoding_features_spec)

  sess = tf.InteractiveSession()
  sess.run(tf.global_variables_initializer())
  sess.run(tf.local_variables_initializer())
  sess.run(tf.tables_initializer())
  tf.train.start_queue_runners(sess)

  d = []
  new_line = sess.run(read_data)
  count = 0
  while new_line:
    if filter_fn:
      keep_line = filter_fn(new_line)
    else:
      keep_line = True
    keep_line = keep_line and (random.random() < random_filter_keep_rate)

    if keep_line:
      d.append(new_line)
      count += 1
      if count >= max_n_examples:
        break
      if not (count % 100000):
        logging.info('Loaded {} lines.'.format(count))

    try:
      new_line = sess.run(read_data)
    except tf.errors.OutOfRangeError:
      logging.info('End of file.')
      break

  res = pd.DataFrame(d)
  return res


================================================
FILE: model_evaluation/utils_export/utils_tfrecords_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf records utilities."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import unittest

import pandas as pd
import tensorflow as tf

import utils_tfrecords


class TestEncodingAndDecoding(unittest.TestCase):
  """Test to encode and decode a pandas DataFrame"""

  def testCorrect(self):
    input_df = pd.DataFrame({
        'x': [1, 2, 3],
        'y': ['a', 'b', 'c'],
        'z': [['a', 'b'], ['c', 'd'], ['e', 'f']]
    })
    encoding_feature_spec = {
        'x': utils_tfrecords.EncodingFeatureSpec.INTEGER,
        'y': utils_tfrecords.EncodingFeatureSpec.STRING,
        'z': utils_tfrecords.EncodingFeatureSpec.LIST_STRING
    }
    decoding_spec = {
        'x': tf.FixedLenFeature([], dtype=tf.int64),
        'y': tf.FixedLenFeature([], dtype=tf.string),
        'z': tf.FixedLenFeature([2], dtype=tf.string),
    }
    tf_records_path = 'unittest.tf_records'
    utils_tfrecords.encode_pandas_to_tfrecords(input_df, encoding_feature_spec,
                                               tf_records_path)

    output_df = utils_tfrecords.decode_tf_records_to_pandas(
        decoding_spec, tf_records_path)
    try:
      pd.testing.assert_frame_equal(input_df, output_df)
    except ValueError:
      self.fail('Dataset raised an exception unexpectedly!')


class TestFeatureKeySpec(unittest.TestCase):
  """Verifies the format of Feature Spec"""

  def test_not_a_dictionary(self):
    feature_keys_spec = 'not_a_dict',
    with self.assertRaises(Exception) as context:
      utils_tfrecords.is_valid_spec(feature_keys_spec)
    self.assertIn('Spec should be a dictionary instance.',
                  str(context.exception))

  def test_not_in_possible(self):
    feature_keys_spec = {'key': 'other_possibility'}
    with self.assertRaises(Exception) as context:
      utils_tfrecords.is_valid_spec(feature_keys_spec)
    self.assertIn('Spec is badly defined. Authorized types are one of',
                  str(context.exception))

  def test_valid(self):
    try:
      feature_keys_spec = {
          'comment_text': utils_tfrecords.EncodingFeatureSpec.LIST_STRING
      }
      utils_tfrecords.is_valid_spec(feature_keys_spec)
    except ValueError:
      self.fail('Dataset raised an exception unexpectedly!')


if __name__ == '__main__':
  unittest.main()


================================================
FILE: travis_blase_test_support/bazel_0.18.1-linux-x86_64.deb.sha256
================================================
4c2cd0a71ab1b65753aeb757af36bd6ebde9da4e53183525a1e1849c2542fdda  bazel_0.18.1-linux-x86_64.deb