Repository: conversationai/conversationai-models Branch: main Commit: d3a724c96e24 Files: 196 Total size: 953.6 KB Directory structure: gitextract__2536wl_/ ├── .bazelrc ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── annotator_models/ │ ├── README.md │ ├── bin/ │ │ ├── cancel-job │ │ ├── ls-jobs │ │ ├── run │ │ ├── run_local │ │ └── stream-logs │ ├── cpu_config.yaml │ ├── requirements.txt │ ├── results/ │ │ └── .gitignore │ └── trainer/ │ ├── __init__.py │ ├── dawid_skene.py │ └── dawid_skene_test.py ├── attention-tutorial/ │ ├── Attention_Model_Tutorial.ipynb │ ├── README.md │ ├── checkpoints/ │ │ └── README.md │ ├── data/ │ │ └── README.md │ ├── process_figshare.py │ ├── requirements.txt │ └── visualize_attention.py ├── data_preparation/ │ ├── README.md │ ├── config.ini │ ├── preprocessing/ │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── preprocessing.py │ │ └── tfrecord_utils.py │ ├── requirements.txt │ ├── run_preprocessing_artificial_bias.py │ ├── run_preprocessing_data_split.py │ └── setup.py ├── experiments/ │ ├── .gitignore │ ├── README.md │ ├── WORKSPACE │ ├── __init__.py │ ├── requirements.txt │ ├── setup.py │ ├── testdata/ │ │ ├── BUILD │ │ ├── cats_and_dogs.jsonl │ │ ├── cats_and_dogs_onehot.vocab.txt │ │ ├── cats_and_dogs_with_cat_opt_int_labels.jsonl │ │ └── cats_and_dogs_with_partial_cat_int_labels.jsonl │ ├── tf_trainer/ │ │ ├── __init__.py │ │ ├── common/ │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ ├── base_model.py │ │ │ ├── basic_gpu_config.yaml │ │ │ ├── cnn_spec_parser.py │ │ │ ├── cnn_spec_parser_test.py │ │ │ ├── dataset_config.sh │ │ │ ├── dataset_input.py │ │ │ ├── episodic_tfrecord_input.py │ │ │ ├── episodic_tfrecord_input_test.py │ │ │ ├── model_trainer.py │ │ │ ├── p100_config.yaml │ │ │ ├── serving_input.py │ │ │ ├── text_preprocessor.py │ │ │ ├── text_preprocessor_test.py │ │ │ ├── tfrecord_input.py │ │ │ ├── tfrecord_input_test.py │ │ │ ├── token_embedding_index.py │ │ │ ├── token_embedding_index_test.py │ │ │ ├── types.py │ │ │ └── v100_config.yaml │ │ ├── tf_char_cnn/ │ │ │ ├── __init__.py │ │ │ ├── hparam_config.yaml │ │ │ ├── hparam_config_civil_comments.yaml │ │ │ ├── hparam_config_many_communities.yaml │ │ │ ├── hparam_config_toxicity.yaml │ │ │ ├── model.py │ │ │ ├── run.deploy.sh │ │ │ ├── run.hyperparameter.sh │ │ │ ├── run.local.sh │ │ │ ├── run.ml_engine.sh │ │ │ └── run.py │ │ ├── tf_cnn/ │ │ │ ├── __init__.py │ │ │ ├── finetune.py │ │ │ ├── finetune.sh │ │ │ ├── hparam_config.yaml │ │ │ ├── hparam_config_civil_comments.yaml │ │ │ ├── hparam_config_many_communities.yaml │ │ │ ├── hparam_config_many_communities_40_per_8_shot.yaml │ │ │ ├── hparam_config_toxicity.yaml │ │ │ ├── model.py │ │ │ ├── run.deploy.sh │ │ │ ├── run.hyperparameter.sh │ │ │ ├── run.local.sh │ │ │ ├── run.ml_engine.sh │ │ │ └── run.py │ │ ├── tf_gru_attention/ │ │ │ ├── __init__.py │ │ │ ├── finetune.py │ │ │ ├── finetune.sh │ │ │ ├── hparam_config.yaml │ │ │ ├── hparam_config_civil_comments.yaml │ │ │ ├── hparam_config_many_communities.yaml │ │ │ ├── hparam_config_many_communities_40_per_8_shot.yaml │ │ │ ├── hparam_config_toxicity.yaml │ │ │ ├── model.py │ │ │ ├── run.deploy.sh │ │ │ ├── run.hyperparameter.sh │ │ │ ├── run.local.sh │ │ │ ├── run.ml_engine.sh │ │ │ └── run.py │ │ ├── tf_hub_classifier/ │ │ │ ├── __init__.py │ │ │ ├── finetune.py │ │ │ ├── finetune.sh │ │ │ ├── hparam_config.yaml │ │ │ ├── hparam_config_civil_comments.yaml │ │ │ ├── hparam_config_many_communities.yaml │ │ │ ├── hparam_config_many_communities_40_per_8_shot.yaml │ │ │ ├── hparam_config_toxicity.yaml │ │ │ ├── model.py │ │ │ ├── run.deploy.sh │ │ │ ├── run.hyperparameter.sh │ │ │ ├── run.local.sh │ │ │ ├── run.ml_engine.sh │ │ │ └── run.py │ │ ├── tf_hub_tfjs/ │ │ │ ├── __init__.py │ │ │ ├── model.py │ │ │ ├── notebook/ │ │ │ │ ├── BiasEvaluation.ipynb │ │ │ │ └── EvaluatingClassifier.ipynb │ │ │ ├── run.local.sh │ │ │ └── run.py │ │ ├── tf_kona_prototypical_network/ │ │ │ └── proto.py │ │ └── tf_word_label_embedding/ │ │ ├── __init__.py │ │ ├── hparam_config.yaml │ │ ├── model.py │ │ ├── run.hyperparameter.sh │ │ ├── run.local.sh │ │ ├── run.ml_engine.sh │ │ └── run.py │ └── tools/ │ ├── bert_tfrecord_converter.py │ ├── convert_csv_to_tfrecord.py │ └── convert_jsonl_to_tfrecord.py ├── hierarchical_attention_research/ │ └── han_model/ │ ├── .gitignore │ ├── HAN_model.py │ ├── LICENSE │ ├── README.md │ ├── bn_lstm.py │ ├── bn_lstm_test.py │ ├── data_util.py │ ├── model_components.py │ ├── requirements.txt │ ├── worker.py │ ├── yelp.py │ └── yelp_prepare.py ├── kaggle-classification/ │ ├── .gitignore │ ├── README.md │ ├── __init__.py │ ├── bin/ │ │ ├── cancel-job │ │ ├── ls-jobs │ │ ├── run │ │ ├── run_keras.sh │ │ ├── run_keras_local.sh │ │ ├── run_local │ │ └── stream-logs │ ├── config.yaml │ ├── gpu_config.yaml │ ├── hparam_config.yaml │ ├── keras_hparam_config.yaml │ ├── keras_trainer/ │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── cnn_with_attention.py │ │ ├── custom_metrics.py │ │ ├── model.py │ │ ├── rnn.py │ │ └── single_layer_cnn.py │ ├── requirements.txt │ ├── setup.py │ └── trainer/ │ ├── __init__.py │ ├── model.py │ └── wikidata.py ├── model_evaluation/ │ ├── BiosBias Evaluation.ipynb │ ├── Predict bias.ipynb │ ├── README.md │ ├── deploy_models.sh │ ├── few_shot_learning_baseline_evaluation.ipynb │ ├── input_fn_example.py │ ├── jigsaw_evaluation_pipeline.ipynb │ ├── requirements.txt │ ├── score_bias_data.sh │ ├── score_scrubbed_data.sh │ ├── score_test_data.py │ └── utils_export/ │ ├── __init__.py │ ├── dataset.py │ ├── dataset_test.py │ ├── deploy_list_models.py │ ├── utils_cloudml.py │ ├── utils_cloudml_test.py │ ├── utils_tfrecords.py │ └── utils_tfrecords_test.py └── travis_blase_test_support/ └── bazel_0.18.1-linux-x86_64.deb.sha256 ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bazelrc ================================================ startup --host_jvm_args=-Xmx2500m startup --host_jvm_args=-Xms2500m startup --batch test --ram_utilization_factor=10 build --verbose_failures build --spawn_strategy=standalone --genrule_strategy=standalone test --test_strategy=standalone ================================================ FILE: .gitignore ================================================ # Editor config. .vscode/ # Python Compiles files. *.pyc # Virtual Environment files. .pyenv .virtualenv env .venv # mypy cache files for type-checking. .mypy_cache # Bazel bazel-bin bazel-experiments bazel-genfiles bazel-out bazel-testlogs ================================================ FILE: .travis.yml ================================================ language: python python: - "3.5" - "3.6" dist: trusty addons: apt: sources: - ubuntu-toolchain-r-test packages: - wget - pkg-config before_install: - wget https://github.com/bazelbuild/bazel/releases/download/0.18.1/bazel_0.18.1-linux-x86_64.deb - sha256sum -c travis_blase_test_support/bazel_0.18.1-linux-x86_64.deb.sha256 - sudo dpkg -i bazel_0.18.1-linux-x86_64.deb - cd experiments install: - pip install -r requirements.txt script: - bazel test --test_output=streamed ... ================================================ FILE: CONTRIBUTING.md ================================================ # How to contribute We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to follow. ## Contributor License Agreement Contributions to this project must be accompanied by a Contributor License Agreement. You (or your employer) retain the copyright to your contribution, this simply gives us permission to use and redistribute your contributions as part of the project. Head over to to see your current agreements on file or to sign a new one. You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again. ## Code reviews All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help] for more information on using pull requests. [GitHub Help]: https://help.github.com/articles/about-pull-requests/ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # ConversationAI Models This repository is contains example code to train machine learning models for text classification as part of the [Conversation AI](https://conversationai.github.io/) project. # Outline of the codebase * `experiments/` contains the ML training framework. * `annotator-models/` contains a Dawid-Skene implementation for modelling rater quality to produce better annotations. * `attention-tutorial/` contains an introductory ipython notebook for RNNs with attention, as presented at Devoxx talk ["Tensorflow, deep learning and modern RNN architectures, without a PhD by Martin Gorner"](https://www.youtube.com/watch?v=pzOzmxCR37I) * `kaggle-classification/` early experiments with Keras and Estimator for training on [the Jigsaw Toxicity Kaggle competition](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge). Will be superceeded by `experiments/` shortly. * `model_evaluation/` contains utilities to use a model deployed on cloud MLE, and some notebooks to illustrate typical evaluation metrics. ## About this code This repository contains example code to help experiment with models to improve conversations; it is not an official Google product. ================================================ FILE: annotator_models/README.md ================================================ # Modeling Anotators This is an implementation of the [Dawid-Skene model](http://crowdsourcing-class.org/readings/downloads/ml/EM.pdf). Dawid-Skene is an unsupervised model that can be used to improve the quality of a crowdsourced dataset by learning annotator error rate and predicting the true item labels. This code was adapted from an [implementation](https://github.com/dallascard/dawid_skene) by [dallascard](https://github.com/dallascard). ## To Run Locally 1. Setup a [virtualenv](https://virtualenvwrapper.readthedocs.io/en/latest/) for the project (recommended, but technically optional). Python 2: ``` python -m virtualenv env ``` Python 3: ``` python3 -m venv env ``` From either to enter your virtual env: ```shell source env/bin/activate ``` 2. Install library dependencies: ```shell pip install -r requirements.txt ``` 3. Create training data. The training data must be a CSV that has fields for the worker ID, item ID and label. You can specify the column names for these fields as flags to the training script. For example: ``` comment_id,worker_id,toxic 1519346288,43675129,0 1519346288,41122119,0 1519346288,38510102,0 1519346288,43650017,0 1519346288,28524232,0 ... ``` 4. Run a model on a given class (e.g. 'toxic' or 'obscene'). There are examples of how to run the model locally and using ml-engine in [`bin/run_local`](bin/run_local) and [`bin/run`](bin/run) respectively. Note: to run in google cloud, you will need to be authenticated with Google Cloud (you can run `gcloud auth application-default login` to do this) and you must have access to the cloud bucket where the data is located (you can test this by running `gcloud storage ls gs://kaggle-model-experiments/`). 5. The output is two files written to the `job-dir` directory specified in the run script. * `error_rates_{LABEL}_{N_ANNOTATIONS}.csv` - the error rates for each annotator * `predictions_{LABEL}_{N_ANNOTATIONS}.csv` - the predicted labels for each item ================================================ FILE: annotator_models/bin/cancel-job ================================================ #!/bin/bash gcloud ml-engine jobs cancel $1 ================================================ FILE: annotator_models/bin/ls-jobs ================================================ #!/bin/bash gcloud ml-engine jobs list | grep $USER ================================================ FILE: annotator_models/bin/run ================================================ #!/bin/bash # # A script to train the kaggle model remotely using ml-engine. # # To run with default hyperparameters from the kaggle-classification directory just enter: # './bin/run' # # # # Setup Steps: # 1. Install the gcloud SDK # 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]` # 3. Put the train and test data in Cloud Storage, `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/` # # Edit these! BUCKET_NAME=annotator_models CONFIG=cpu_config.yaml MAX_ITER=50 TOLERANCE=1 PSEUDO_COUNT=1 declare -a LABELS=("obscene" "sexual_explicit" "threat" "flirtation" "identity_hate" "insult") # Note: this must be compatible with cells that have GPUs. us-central1 works. # See: https://cloud.google.com/ml-engine/docs/using-gpus REGION=us-central1 while getopts :c:h opt; do case ${opt} in h) echo "Usage: run [-c config_filename.yaml]" echo "Flags: " echo -e " -c Specify a config file (e.g. use hparam_config to enable hyperparameter tuning)" exit 0;; c) echo "Using custom config ${OPTARG}" CONFIG=${OPTARG};; :) echo "Error: ${OPTARG} requires an argument." echo "Use 'run -h' for help." exit 1;; \?) echo "Invalid flag. Use 'run -h' for help." exit 1;; esac done echo "Writing to $OUTPUT_PATH" for label in "${LABELS[@]}" do echo "Running on $label" DATA_PATH=gs://annotator_models/kaggle_annotation_data/dawid_skene_annotations_on_kaggle_combined_${label}.csv COMMENT_TEXT_PATH=$DATA_PATH JOB_NAME=${USER}_dawid_skene_kaggle_${label} DATE=`date '+%Y%m%d_%H%M%S'` DATE_DAY_ONLY=`date '+%Y%m%d'` OUTPUT_PATH=gs://${BUCKET_NAME}/models/${USER}/${DATE_DAY_ONLY} gcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \ --job-dir=${OUTPUT_PATH} \ --runtime-version=1.4 \ --config=${CONFIG} \ --module-name=trainer.dawid_skene \ --package-path=trainer \ --region=$REGION \ --verbosity=debug -- \ --data-path=$DATA_PATH \ --comment-text-path=$COMMENT_TEXT_PATH \ --label=$label \ --max-iter=$MAX_ITER \ --tolerance=$TOLERANCE \ --worker-id-col='annotator_id' \ --unit-id-col='comment_id' \ --pseudo-count=$PSEUDO_COUNT done ================================================ FILE: annotator_models/bin/run_local ================================================ #!/bin/bash # A script to train the kaggle model locally. DATE=`date '+%Y%m%d_%H%M%S'` BUCKET_NAME=annotator_models declare -a arr=("obscene" "threat" "insult" "identity_hate" "toxic" "severe_toxic") ## now loop through the above array for label in "${arr[@]}" do data_path=gs://${BUCKET_NAME}/kaggle_annotation_data/dawid_skene_annotations_on_kaggle_test_${label}.csv \ gcloud ml-engine local train \ --module-name=trainer.dawid_skene \ --package-path=trainer -- \ --data-path=${data_path} \ --comment-text-path=${data_path} \ --label=${label} \ --job-dir='results' \ --worker-id-col='annotator_id' \ --unit-id-col='comment_id' \ --tolerance=50 \ --n_examples=1000 done ================================================ FILE: annotator_models/bin/stream-logs ================================================ #!/bin/bash gcloud ml-engine jobs stream-logs $1 ================================================ FILE: annotator_models/cpu_config.yaml ================================================ trainingInput: scaleTier: CUSTOM ## Custom scaleTier needed for using > 1 GPU machines. # scaleTier: CUSTOM masterType: large_model # workerType: complex_model_m_gpu # parameterServerType: large_model # workerCount: 9 # parameterServerCount: 3 ================================================ FILE: annotator_models/requirements.txt ================================================ absl-py==0.1.12 astor==0.6.2 backports.weakref==1.0.post1 bleach==3.3.0 cachetools==2.0.1 certifi==2024.7.4 chardet==3.0.4 dill==0.2.7.1 enum34==1.1.6 funcsigs==1.0.2 future==0.18.3 futures==3.2.0 gapic-google-cloud-datastore-v1==0.15.3 gapic-google-cloud-error-reporting-v1beta1==0.15.3 gapic-google-cloud-logging-v2==0.91.3 gast==0.2.0 google-api-core==1.1.0 google-auth==1.4.1 google-auth-oauthlib==0.2.0 google-cloud==0.32.0 google-cloud-bigquery==0.31.0 google-cloud-bigquery-datatransfer==0.1.1 google-cloud-bigtable==0.28.1 google-cloud-container==0.1.1 google-cloud-core==0.28.1 google-cloud-datastore==1.4.0 google-cloud-dns==0.28.0 google-cloud-error-reporting==0.28.0 google-cloud-firestore==0.28.0 google-cloud-language==1.0.1 google-cloud-logging==1.4.0 google-cloud-monitoring==0.28.1 google-cloud-pubsub==0.30.1 google-cloud-resource-manager==0.28.1 google-cloud-runtimeconfig==0.28.1 google-cloud-spanner==0.29.0 google-cloud-speech==0.30.0 google-cloud-storage==1.6.0 google-cloud-trace==0.17.0 google-cloud-translate==1.3.1 google-cloud-videointelligence==1.0.1 google-cloud-vision==0.29.0 google-gax==0.15.16 google-resumable-media==0.3.1 googleapis-common-protos==1.5.3 grpc-google-iam-v1==0.11.4 grpcio==1.53.2 html5lib==0.999999999 httplib2==0.19.0 idna==3.7 Markdown==2.6.11 mock==2.0.0 numpy==1.22.0 oauth2client==3.0.0 oauthlib==2.0.7 pandas==0.22.0 pandas-gbq==0.3.1 pbr==4.0.0 ply==3.8 proto-google-cloud-datastore-v1==0.90.4 proto-google-cloud-error-reporting-v1beta1==0.15.3 proto-google-cloud-logging-v2==0.91.3 protobuf==3.18.3 psutil==5.6.6 pyasn1==0.4.2 pyasn1-modules==0.2.1 python-dateutil==2.7.2 pytz==2018.3 requests==2.32.0 requests-oauthlib==0.8.0 rsa==4.7 six==1.11.0 tensorboard==1.12.0 tensorflow==2.12.1 termcolor==1.1.0 urllib3==1.26.18 Werkzeug==3.0.3 ================================================ FILE: annotator_models/results/.gitignore ================================================ * !.gitignore ================================================ FILE: annotator_models/trainer/__init__.py ================================================ ================================================ FILE: annotator_models/trainer/dawid_skene.py ================================================ """Description: Given unreliable ratings of items classes by multiple raters, determine the most likely true class for each item, class marginals, and individual error rates for each rater, using Expectation Maximization References: ( Dawid and Skene (1979). Maximum Likelihood Estimation of Observer Error-Rates Using the EM Algorithm. Journal of the Royal Statistical Society. Series C (Applied Statistics), Vol. 28, No. 1, pp. 20-28. """ import argparse import logging import math import sys import time import numpy as np import pandas as pd from scipy import stats import tensorflow as tf FLAGS = None np.set_printoptions(precision=2) def run(items, raters, classes, counts, label, psuedo_count, tol=1, max_iter=25, init='average'): """ Run the Dawid-Skene estimator on response data Input: responses: a pandas DataFrame of ratings where each row is a rating from some rater ('_worker_id') on some item ('_unit_id') tol: tolerance required for convergence of EM max_iter: maximum number of iterations of EM """ # initialize iteration = 0 converged = False old_class_marginals = None old_error_rates = None # item_classes is a matrix of estimates of true item classes of size # [items, classes] item_classes = initialize(counts) [nItems, nRaters, nClasses] = np.shape(counts) logging.info('Iter\tlog-likelihood\tdelta-CM\tdelta-Y_hat') while not converged: iteration += 1 start_iter = time.time() # M-step - updated error rates and class marginals given new # distribution over true item classes old_item_classes = item_classes (class_marginals, error_rates) = m_step(counts, item_classes, psuedo_count) # E-step - calculate expected item classes given error rates and # class marginals item_classes = e_step_verbose(counts, class_marginals, error_rates) # check likelihood log_L = calc_likelihood(counts, class_marginals, error_rates) # calculate the number of seconds the last iteration took iter_time = time.time() - start_iter # check for convergence if old_class_marginals is not None: class_marginals_diff = np.sum( np.abs(class_marginals - old_class_marginals)) item_class_diff = np.sum(np.abs(item_classes - old_item_classes)) logging.info('{0}\t{1:.1f}\t{2:.4f}\t\t{3:.2f}\t({4:3.2f} secs)'.format( iteration, log_L, class_marginals_diff, item_class_diff, iter_time)) if (class_marginals_diff < tol and item_class_diff < tol) \ or iteration > max_iter: converged = True else: logging.info('{0}\t{1:.1f}'.format(iteration, log_L)) # update current values old_class_marginals = class_marginals old_error_rates = error_rates return class_marginals, error_rates, item_classes def load_data(path, unit_id, worker_id, label): logging.info('Loading data from {0}'.format(path)) with tf.gfile.Open(path, 'rb') as fileobj: df = pd.read_csv(fileobj, encoding='utf-8') # only keep necessary columns df = df[[unit_id, worker_id, label]] return df def initialize(counts): """ Get initial estimates for the true item classes using counts see equation 3.1 in Dawid-Skene (1979) Input: counts: counts of the number of times each response was given by each rater for each item: [items x raters x classes]. Note in the crowd rating example, counts will be a 0/1 matrix. Returns: item_classes: matrix of estimates of true item classes: [items x responses] """ [nItems, nRaters, nClasses] = np.shape(counts) # sum over raters response_sums = np.sum(counts, 1) # create an empty array item_classes = np.zeros([nItems, nClasses]) # for each item, take the average number of ratings in each class for p in range(nItems): item_classes[p, :] = response_sums[p, :] / np.sum( response_sums[p, :], dtype=float) return item_classes def m_step(counts, item_classes, psuedo_count): """ Get estimates for the prior class probabilities (p_j) and the error rates (pi_jkl) using MLE with current estimates of true item classes See equations 2.3 and 2.4 in Dawid-Skene (1979) Input: counts: Array of how many times each rating was given by each rater for each item item_classes: Matrix of current assignments of items to classes psuedo_count: A psuedo count used to smooth the error rates. For each rater k and for each class i and class j, we pretend rater k has rated psuedo_count examples with class i when class j was the true class. Returns: p_j: class marginals [classes] pi_kjl: error rates - the probability of rater k giving response l for an item in class j [observers, classes, classes] """ [nItems, nRaters, nClasses] = np.shape(counts) # compute class marginals class_marginals = np.sum(item_classes, axis=0) / float(nItems) # compute error rates for each rater, each predicted class # and each true class error_rates = np.matmul(counts.T, item_classes) + psuedo_count # reorder axes so its of size [nItems x nClasses x nClasses] error_rates = np.einsum('abc->bca', error_rates) # divide each row by the sum of the error rates over all observation classes sum_over_responses = np.sum(error_rates, axis=2)[:, :, None] # for cases where an annotator has never used a label, set their sum over # responses for that label to 1 to avoid nan when we divide. The result will # be error_rate[k, i, j] is 0 if annotator k never used label i. sum_over_responses[sum_over_responses == 0] = 1 error_rates = np.divide(error_rates, sum_over_responses) return (class_marginals, error_rates) def m_step_verbose(counts, item_classes, psuedo_count): """ This method is the verbose (i.e. not vectorized) version of the m_step. It is currently not used because the vectorized version is faster, but we leave it here for future debugging. Get estimates for the prior class probabilities (p_j) and the error rates (pi_jkl) using MLE with current estimates of true item classes See equations 2.3 and 2.4 in Dawid-Skene (1979) Input: counts: Array of how many times each rating was given by each rater for each item item_classes: Matrix of current assignments of items to classes psuedo_count: A psuedo count used to smooth the error rates. For each rater k and for each class i and class j, we pretend rater k has rated psuedo_count examples with class i when class j was the true class. Returns: p_j: class marginals [classes] pi_kjl: error rates - the probability of rater k giving response l for an item in class j [observers, classes, classes] """ [nItems, nRaters, nClasses] = np.shape(counts) # compute class marginals class_marginals = np.sum(item_classes, 0) / float(nItems) # compute error rates for each rater, each predicted class # and each true class error_rates = np.zeros([nRaters, nClasses, nClasses]) for k in range(nRaters): for j in range(nClasses): for l in range(nClasses): error_rates[k, j, l] = np.dot(item_classes[:,j], counts[:,k,l]) \ + psuedo_count # normalize by summing over all observation classes sum_over_responses = np.sum(error_rates[k, j, :]) if sum_over_responses > 0: error_rates[k, j, :] = error_rates[k, j, :] / float(sum_over_responses) return (class_marginals, error_rates) def e_step(counts_tiled, class_marginals, error_rates): """ Determine the probability of each item belonging to each class, given current ML estimates of the parameters from the M-step See equation 2.5 in Dawid-Skene (1979) Inputs: counts_tiled: A matrix of how many times each rating was given by each rater for each item, repeated for each class to make matrix multiplication fasterr. Size: [nItems, nRaters, nClasses, nClasses] class_marginals: probability of a random item belonging to each class. Size: [nClasses] error_rates: probability of rater k assigning a item in class j to class l. Size [nRaters, nClasses, nClasses] Returns: item_classes: Soft assignments of items to classes [items x classes] """ [nItems, _, nClasses, _] = np.shape(counts_tiled) error_rates_tiled = np.tile(error_rates, (nItems, 1, 1, 1)) power = np.power(error_rates_tiled, counts_tiled) # Note, multiplying over axis 1 and then 2 is substantially faster than # the equivalent np.prod(power, axis=(1,3) item_classes = class_marginals * np.prod(np.prod(power, axis=1), axis=2) # normalize error rates by dividing by the sum over all classes item_sum = np.sum(item_classes, axis=1, keepdims=True) item_classes = np.divide(item_classes, np.tile(item_sum, (1, nClasses))) return item_classes def e_step_verbose(counts, class_marginals, error_rates): """ This method is the verbose (i.e. not vectorized) version of the e_step. It is actually faster than the vectorized e_step function (16 seconds vs 25 seconds respectively on 10k ratings). Determine the probability of each item belonging to each class, given current ML estimates of the parameters from the M-step See equation 2.5 in Dawid-Skene (1979) Inputs: counts: Array of how many times each rating was given by each rater for each item class_marginals: probability of a random item belonging to each class error_rates: probability of rater k assigning a item in class j to class l [raters, classes, classes] Returns: item_classes: Soft assignments of items to classes [items x classes] """ [nItems, nRaters, nClasses] = np.shape(counts) item_classes = np.zeros([nItems, nClasses]) for i in range(nItems): for j in range(nClasses): estimate = class_marginals[j] estimate *= np.prod(np.power(error_rates[:, j, :], counts[i, :, :])) item_classes[i, j] = estimate # normalize error rates by dividing by the sum over all classes item_sum = np.sum(item_classes, axis=1, keepdims=True) item_classes = np.divide(item_classes, np.tile(item_sum, (1, nClasses))) return item_classes def calc_likelihood(counts, class_marginals, error_rates): """ Calculate the likelihood given the current parameter estimates This should go up monotonically as EM proceeds See equation 2.7 in Dawid-Skene (1979) Inputs: counts: Array of how many times each response was received by each rater from each item class_marginals: probability of a random item belonging to each class error_rates: probability of rater k assigning a item in class j to class l [raters, classes, classes] Returns: Likelihood given current parameter estimates """ [nItems, nRaters, nClasses] = np.shape(counts) log_L = 0.0 for i in range(nItems): item_likelihood = 0.0 for j in range(nClasses): class_prior = class_marginals[j] item_class_likelihood = np.prod( np.power(error_rates[:, j, :], counts[i, :, :])) item_class_posterior = class_prior * item_class_likelihood item_likelihood += item_class_posterior temp = log_L + np.log(item_likelihood) if np.isnan(temp) or np.isinf(temp): logging.info('{0}, {1}, {2}'.format(i, log_L, np.log(item_likelihood), temp)) sys.exit() log_L = temp return log_L def random_initialization(counts): """ Similar to initialize() above, except choose one initial class for each item, weighted in proportion to the counts. Input: counts: counts of the number of times each response was received by each rater from each item: [items x raters x classes] Returns: item_classes: matrix of estimates of true item classes: [items x responses] """ [nItems, nRaters, nClasses] = np.shape(counts) response_sums = np.sum(counts, 1) # create an empty array item_classes = np.zeros([nItems, nClasses]) # for each item, choose a random initial class, weighted in proportion # to the counts from all raters for p in range(nItems): weights = response_sums[p, :] / np.sum(response_sums[p, :], dtype=float) item_classes[p, np.random.choice(np.arange(nClasses), p=weights)] = 1 return item_classes def majority_voting(counts): """ An alternative way to initialize assignment of items to classes i.e Get initial estimates for the true item classes using majority voting Input: counts: Counts of the number of times each response was received by each rater from each item: [items x raters x classes] Returns: item_classes: matrix of initial estimates of true item classes: [items x responses] """ [nItems, nRaters, nClasses] = np.shape(counts) # sum over observers response_sums = np.sum(counts, 1) # create an empty array item_classes = np.zeros([nItems, nClasses]) # take the most frequent class for each item for p in range(nItems): indices = np.argwhere(response_sums[p, :] == np.max(response_sums[p, :])) # in the case of ties, take the lowest valued label (could be randomized) item_classes[p, np.min(indices)] = 1 return item_classes def parse_item_classes(df, label, item_classes, index_to_unit_id_map, index_to_y_map, unit_id, worker_id, comment_text_path): """ Given the original data df, the predicted item_classes, and the data mappings, returns a DataFrame with the fields: * _unit_index: the 0,1,...nItems index * _unit_id: the original item ID * {LABEL}_hat: the predicted probability of the item being labeled 1 as learned from the Dawid-Skene algorithm * {LABEL}_mean: the mean of the original ratings """ LABEL_HAT = '{}_hat'.format(label) LABEL_MEAN = '{}_mean'.format(label) ROUND_DEC = 8 _, N_ClASSES = np.shape(item_classes) df_predictions = pd.DataFrame() # Add columns for predictions for each class col_names = [] for k in range(N_ClASSES): # y is the original value of the class. When we train, we re-map # all the classes to 0,1,....K. But our data has classes like # -2,-1,0,1,2. In that case, of k is 0, then y would be -2 y = index_to_y_map[k] col_name = '{0}_{1}'.format(LABEL_HAT, y) col_names.append(col_name) df_predictions[col_name] = [round(i[k], ROUND_DEC) for i in item_classes] # To get a prediction of the mean label, multiply our predictions with the # true y values. y_values = list(index_to_y_map.values()) col_name = '{0}_hat_mean'.format(label) df_predictions[col_name] = np.dot(df_predictions[col_names], list(y_values)) # Use the _unit_index to map to the original _unit_id df_predictions['_unit_index'] = range(len(item_classes)) df_predictions[unit_id] = df_predictions['_unit_index']\ .apply(lambda i: index_to_unit_id_map[i]) # Calculate the y_mean from the original data and join on _unit_id # Add a column for the mean predictions df[label] = df[label].astype(float) mean_labels = df.groupby(unit_id, as_index=False)[label]\ .mean()\ .round(ROUND_DEC)\ .rename(index=int, columns={label: LABEL_MEAN}) df_predictions = pd.merge(mean_labels, df_predictions, on=unit_id) # join with data that contains the item-level comment text if comment_text_path: with tf.gfile.Open(comment_text_path, 'r') as fileobj: logging.info( 'Loading comment text data from {}'.format(comment_text_path)) df_comments = pd.read_csv(fileobj) # drop duplicate comments df_comments = df_comments.drop_duplicates(subset=unit_id) df_predictions = df_predictions.merge(df_comments, on=unit_id) return df_predictions def parse_error_rates(df, error_rates, index_to_worker_id_map, index_to_y_map, unit_id, worker_id): """ Given the original data DataFrame, the predicted error_rates and the mappings between the indexes and ids, returns a DataFrame with the fields: * _worker_index: the 0,1,...nItems index * _worker_id: the original item ID * _error_rate_{k}_{k}: probability the worker would choose class k when the true class is k (for accurate workers, these numbers are high). """ columns = [worker_id, '_worker_index'] df_error_rates = pd.DataFrame() # add the integer _worker_index df_error_rates['_worker_index'] = index_to_worker_id_map.keys() # add the original _worker_id df_error_rates[worker_id] = [j for (i, j) in index_to_worker_id_map.items()] # add annotation counts for each worker worker_counts = df.groupby( by=worker_id, as_index=False)[unit_id]\ .count()\ .rename(index=int, columns={unit_id: 'n_annotations'}) df_error_rates = pd.merge(df_error_rates, worker_counts, on=worker_id) # add the diagonal error rates, which are the per-class accuracy rates, # for each class k, we add a column for p(rater will pick k | item's true class is k) # y_label is the original y value in the data and y_index is the # integer we mapped it to, i.e. 0, 1, ..., |Y| for y_index, y_label in index_to_y_map.items(): col_name = 'accuracy_rate_{0}'.format(y_label) df_error_rates[col_name] = [e[y_index, y_index] for e in error_rates] return df_error_rates def main(FLAGS): logging.basicConfig(level=logging.INFO) # load data, each row is an annotation n_examples = FLAGS.n_examples label = FLAGS.label unit_id = FLAGS.unit_id_col worker_id = FLAGS.worker_id_col comment_text_path = FLAGS.comment_text_path df = load_data(FLAGS.data_path, unit_id, worker_id, label)[0:n_examples] logging.info('Running on {0} examples for label {1}'.format(len(df), label)) # convert rater, item and label IDs to integers starting at 0 # # * worker_id_to_index_map: _worker_id -> index # * index_to_worker_id_map: index -> worker # * unit_id_to_index_map: _unit_id -> index # * index_to_unit_id_map: index -> _unit_id # * y_to_index_map: label -> index # * index_to_y_map: index -> label worker_id_to_index_map = { w: i for (i, w) in enumerate(df[worker_id].unique()) } index_to_worker_id_map = {i: w for (w, i) in worker_id_to_index_map.items()} unit_id_to_index_map = {w: i for (i, w) in enumerate(df[unit_id].unique())} index_to_unit_id_map = {i: w for (w, i) in unit_id_to_index_map.items()} y_to_index_map = {w: i for (i, w) in enumerate(df[label].unique())} index_to_y_map = {i: w for (w, i) in y_to_index_map.items()} # create list of unique raters, items and labels raters = list(df[worker_id].apply(lambda x: worker_id_to_index_map[x])) items = list(df[unit_id].apply(lambda x: unit_id_to_index_map[x])) y = list(df[label].apply(lambda x: y_to_index_map[x])) nClasses = len(df[label].unique()) nItems = len(df[unit_id].unique()) nRaters = len(df[worker_id].unique()) counts = np.zeros([nItems, nRaters, nClasses]) # convert responses to counts for i, item_index in enumerate(items): rater_index = raters[i] y_index = y[i] counts[item_index, rater_index, y_index] += 1 raters_unique = index_to_worker_id_map.keys() items_unique = index_to_unit_id_map.keys() classes_unique = index_to_y_map.keys() logging.info('num items: {0}'.format(len(items_unique))) logging.info('num raters: {0}'.format(len(raters_unique))) logging.info('num classes: {0}'.format(len(classes_unique))) # run EM start = time.time() class_marginals, error_rates, item_classes = run( items_unique, raters_unique, classes_unique, counts, label, FLAGS.pseudo_count, tol=FLAGS.tolerance, max_iter=FLAGS.max_iter) end = time.time() logging.info('training time: {0:.4f} seconds'.format(end - start)) # join comment_text, old labels and new labels df_predictions = parse_item_classes(df, label, item_classes, index_to_unit_id_map, index_to_y_map, unit_id, worker_id, comment_text_path) # join rater error_rates df_error_rates = parse_error_rates(df, error_rates, index_to_worker_id_map, index_to_y_map, unit_id, worker_id) # write predictions and error_rates out as CSV n = len(df) prediction_path = '{0}/predictions_{1}_{2}.csv'.format( FLAGS.job_dir, label, n) error_rates_path = '{0}/error_rates_{1}_{2}.csv'.format( FLAGS.job_dir, label, n) logging.info('Writing predictions to {}'.format(prediction_path)) with tf.gfile.Open(prediction_path, 'w') as fileobj: df_predictions.to_csv(fileobj, index=False, encoding='utf-8') logging.info('Writing error rates to {}'.format(error_rates_path)) with tf.gfile.Open(error_rates_path, 'w') as fileobj: df_error_rates.to_csv(fileobj, index=False, encoding='utf-8') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--data-path', help='The path to data to run on, local or in Cloud Storage.') parser.add_argument( '--comment-text-path', help='The path to comment text, local or in Cloud Storage.') parser.add_argument( '--worker-id-col', help='Column name of worker id.', default='_worker_id') parser.add_argument( '--unit-id-col', help='Column name of unit id.', default='_comment_id') parser.add_argument( '--n_examples', help='The number of annotations to use.', default=10000000, type=int) parser.add_argument( '--label', help='The label to train on, e.g. "obscene" or "threat"', default='obscene') parser.add_argument( '--job-dir', type=str, default='', help='The directory where the job is staged.') parser.add_argument( '--max-iter', help='The max number of iteration to run.', type=int, default=25) parser.add_argument( '--pseudo-count', help='The pseudo count to smooth error rates.', type=float, default=1.0) parser.add_argument( '--tolerance', help='Stop training when variables change less than this value.', type=int, default=1) FLAGS = parser.parse_args() print('FLAGS', FLAGS) main(FLAGS) ================================================ FILE: annotator_models/trainer/dawid_skene_test.py ================================================ """Tests for dawid_skene.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import os import pandas as pd import tempfile import unittest import dawid_skene class DawidSkeneTest(unittest.TestCase): # The contents of Maximum Likelihood Estimation of Observer Error-Rates # Using the EM Algorithm Table 1. def setUp(self): self.table_1 = pd.DataFrame.from_dict({ 'patient': range(1, 46), 11: [ 1, 3, 1, 2, 2, 2, 1, 3, 2, 2, 4, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 2, 4, 2, 2, 3, 1, 1, 1, 2, 1, 2 ], 12: [ 1, 3, 1, 2, 2, 2, 2, 3, 2, 3, 4, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 3, 1, 1, 1, 1, 3, 1, 2, 2, 3, 2, 3, 3, 1, 1, 2, 3, 2, 2 ], 13: [ 1, 3, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 1, 3, 1, 2, 2, 3, 1, 2, 3, 1, 1, 1, 2, 1, 2 ], 2: [ 1, 4, 2, 3, 3, 3, 2, 3, 2, 2, 4, 3, 1, 3, 1, 2, 1, 1, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 3, 4, 2, 3, 3, 1, 1, 2, 2, 1, 2 ], 3: [ 1, 3, 1, 1, 2, 3, 1, 4, 2, 2, 4, 3, 1, 2, 1, 1, 1, 1, 2, 3, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 2, 2, 4, 1, 1, 1, 2, 1, 2 ], 4: [ 1, 3, 2, 2, 2, 2, 1, 3, 2, 2, 4, 4, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 3, 1, 2, 3, 4, 3, 3, 3, 1, 1, 1, 2, 1, 2 ], 5: [ 1, 4, 2, 1, 2, 2, 1, 3, 3, 3, 4, 3, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 2, 3, 2, 3, 2, 1, 1, 1, 2, 1, 2 ] }) def test_paper_example(self): with tempfile.TemporaryDirectory() as tempdirname: f = tempfile.NamedTemporaryFile(delete=False) f.file.close() data = self.table_1.set_index('patient').stack().rename_axis(['patient', 'observer']).to_frame('label').reset_index() data['observer'] = data['observer'].map({11:1, 12:1, 13:1, 2:2, 3:3, 4:4, 5:5}) data.to_csv(f.name, header=True) Flags = collections.namedtuple('Flags', 'n_examples label unit_id_col worker_id_col comment_text_path data_path pseudo_count tolerance max_iter job_dir') Flags.data_path = f.name Flags.label = 'label' Flags.worker_id_col = 'observer' Flags.unit_id_col = 'patient' Flags.n_examples = 350 Flags.pseudo_count = 1.0 Flags.comment_text_path = None Flags.max_iter = 25 Flags.tolerance = 1 Flags.job_dir = tempdirname dawid_skene.main(Flags) os.unlink(f.name) predictions = pd.read_csv(os.path.join(tempdirname, 'predictions_label_315.csv')) print(predictions) error_rates = pd.read_csv(os.path.join(tempdirname, 'error_rates_label_315.csv')) print(error_rates) if __name__ == '__main__': unittest.main() ================================================ FILE: attention-tutorial/Attention_Model_Tutorial.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "szO16q_1vXOT" }, "source": [ "# Attention Based Classification Tutorial\n", "\n", "**Recommended time: 30 minutes**\n", "\n", "**Contributors: nthain, martin-gorner**\n", "\n", "\n", "This tutorial provides an introduction to building text classification models in tensorflow that use attention to provide insight into how classification decisions are being made. We will build our tensorflow graph following the Embed - Encode - Attend - Predict paradigm introduced by Matthew Honnibal. For more information about this approach, you can refer to:\n", "\n", "Slides: https://goo.gl/BYT7au\n", "\n", "Video: https://youtu.be/pzOzmxCR37I\n", "\n", "\n", "Figure 1 below provides a representation of the full tensorflow graph we will build in this tutorial. The green squares represent RNN cells and the blue trapezoids represent neural networks for computing attention weights which will be discussed in more detail below. We will implement each piece of this model graph in a seperate function. The whole model will then simply be calling all of these functions in turn. \n", "\n", "\n", "![Figure 1](img/entire_model.png \"Figure 1\")\n", "\n", "This tutorial was created in collaboration with the Tensorflow without a PhD series. To check out more episodes, tutorials, and codelabs from this series, please visit: \n", "\n", "https://github.com/GoogleCloudPlatform/tensorflow-without-a-phd\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "tROhMjW49Dsr" }, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "vSgQlcQqbWyb" }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "\n", "import pandas as pd\n", "import tensorflow as tf\n", "import numpy as np\n", "import time\n", "import os\n", "from sklearn import metrics\n", "from visualize_attention import attentionDisplay\n", "from process_figshare import download_figshare, process_figshare\n", "\n", "tf.set_random_seed(1234)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "KKwX66FG9G-L" }, "source": [ "## Load & Explore Data" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "4YFtwZsD4J7r" }, "source": [ "Let's begin by downloading the data from [Figshare](https://figshare.com/articles/Wikipedia_Talk_Labels_Toxicity/4563973) and cleaning and splitting it for use in training." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "download_figshare()\n", "process_figshare()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We then load these splits as pandas dataframes." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "aIy4ggIxbWyg" }, "outputs": [], "source": [ "SPLITS = ['train', 'dev', 'test']\n", "\n", "wiki = {}\n", "for split in SPLITS:\n", " wiki[split] = pd.read_csv('data/wiki_%s.csv' % split)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "_eZEM1wd5FiA" }, "source": [ "We display the top few rows of the dataframe to see what we're dealing with. The key columns are 'comment' which contains the text of a comment from a Wikipedia talk page and 'toxicity' which contains the fraction of annotators who found this comment to be toxic. More information about the other fields and how this data was collected can be found on [this wiki](https://meta.wikimedia.org/wiki/Research:Detox/Data_Release) and [research paper](https://arxiv.org/abs/1610.08914).\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 195, "output_extras": [ { "item_id": 1 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 334, "status": "ok", "timestamp": 1519755503377, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "6sj_aimNbWyn", "outputId": "36fccb7e-60a3-4d1c-bbfa-03483ff49f84" }, "outputs": [], "source": [ "wiki['train'].head()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "p0cz2kA_9JxK" }, "source": [ "### Hyperparameters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hyperparameters are used to specify various aspects of our model's architecture. In practice, these are often critical to model performance and are carefully tuned using some type of [hyperparameter search](https://en.wikipedia.org/wiki/Hyperparameter_optimization). For this tutorial, we will choose a reasonable set of hyperparameters and treat them as fixed." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "JSvJ3wwwbWys" }, "outputs": [], "source": [ "hparams = {'max_document_length': 60,\n", " 'embedding_size': 50,\n", " 'rnn_cell_size': 128,\n", " 'batch_size': 256,\n", " 'attention_size': 32,\n", " 'attention_depth': 2}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "owTqZg2ebWyv" }, "outputs": [], "source": [ "MAX_LABEL = 2\n", "WORDS_FEATURE = 'words'\n", "NUM_STEPS = 300" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Step 0: Text Preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Before we can build a neural network on comment strings, we first have to complete a number of preprocessing steps. In particular, it is important that we \"tokenize\" the string, splitting it into an array of tokens. In our case, each token will be a word in our sentence and they will be seperated by spaces and punctuation. Many alternative tokenizers exist, some of which use characters as tokens, and others which include punctuation, emojis, or even cleverly handle misspellings. \n", "\n", "Once we've tokenized the sentences, each word will be replaced with an integer representative. This will make the embedding (Step 1) much easier. \n", "\n", "Happily the tensorflow function [VocabularyProcessor](http://tflearn.org/data_utils/#vocabulary-processor) takes care of both the tokenization and integer mapping. We only have to give it the max_document_length argument which will determine the length of the output arrays. If sentences are shorter than this length, they will be padded and if they are longer, they will be trimmed. The VocabularyProcessor is then trained on the training set to build the initial vocabulary and map the words to integers." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "9kcrgebgbWzB" }, "outputs": [], "source": [ "# Initialize the vocabulary processor\n", "vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(hparams['max_document_length'])\n", "\n", "def process_inputs(vocab_processor, df, train_label = 'train', test_label = 'test'):\n", " \n", " # For simplicity, we call our features x and our outputs y\n", " x_train = df['train'].comment\n", " y_train = df['train'].is_toxic\n", " x_test = df['test'].comment\n", " y_test = df['test'].is_toxic\n", "\n", " # Train the vocab_processor from the training set\n", " x_train = vocab_processor.fit_transform(x_train)\n", " # Transform our test set with the vocabulary processor\n", " x_test = vocab_processor.transform(x_test)\n", "\n", " # We need these to be np.arrays instead of generators\n", " x_train = np.array(list(x_train))\n", " x_test = np.array(list(x_test))\n", " y_train = np.array(y_train).astype(int)\n", " y_test = np.array(y_test).astype(int)\n", "\n", " n_words = len(vocab_processor.vocabulary_)\n", " print('Total words: %d' % n_words)\n", "\n", " # Return the transformed data and the number of words\n", " return x_train, y_train, x_test, y_test, n_words\n", "\n", "x_train, y_train, x_test, y_test, n_words = process_inputs(vocab_processor, wiki)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "1KtFmLmp9M0t" }, "source": [ "### Step 1: Embed" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "AjtQe9eT9v4v" }, "source": [ "Neural networks at their core are a composition of operators from linear algebra and non-linear activation functions. In order to perform these computations on our input sentences, we must first embed them as a vector of numbers. There are two main approaches to perform this embedding:\n", "\n", "\n", "1. **Pre-trained:** It is often beneficial to initialize our embedding matrix using pre-trained embeddings like [Word2Vec](??) or [GloVe](??). These embeddings are trained on a huge corpus of text with a general purpose problem so that they incorporate syntactic and semantic properties of the words being embedded and are amenable to transfer learning on new problems. Once initialized, you can optionally train them further for your specific problem by allowing the embedding matrix in the graph to be a trainable variable in our tensorflow graph. \n", "2. **Random:** Alternatively, embeddings can be \"trained from scratch\" by initializing the embedding matrix randomly and then training it like any other parameter in the tensorflow graph.\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "rCubiw6eUVQm" }, "source": [ "In this notebook, we will be using a random initialization. To perform this embedding we use the embed_sequence function from the layers package. This will take our input features, which are the arrays of integers we produced in Step 0, and will randomly initialize a matrix to embed them into. The parameters of this matrix will then be trained with the rest of the graph." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "UG1UXX4L_KQk" }, "outputs": [], "source": [ "def embed(features):\n", " word_vectors = tf.contrib.layers.embed_sequence(\n", " features[WORDS_FEATURE], \n", " vocab_size=n_words, \n", " embed_dim=hparams['embedding_size'])\n", " \n", " return word_vectors" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "nBp5uc-tSee2" }, "source": [ "### Step 2: Encode" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9vjxtIroTBUq" }, "source": [ "A [recurrent neural network](https://en.wikipedia.org/wiki/Recurrent_neural_network) is a deep learning architecture that is useful for encoding sequential information like sentences. They are built around a single cell which contains one of several standard neural network architectures (e.g. simple [RNN](https://en.wikipedia.org/wiki/Recurrent_neural_network), [GRU](https://en.wikipedia.org/wiki/Gated_recurrent_unit), or [LSTM](https://en.wikipedia.org/wiki/Long_short-term_memory)). We will not focus on the details of the architectures, but at each point in time the cell takes in two inputs and produces two outputs. The inputs are the input token for that step in the sequence and some state from the previous steps in the sequence. The outputs produced are the encoded vectors for the current sequence step and a state to pass on to the next step of the sequence. \n", "\n", "Figure 2 shows what this looks like for an unrolled RNN. Each cell (represented by a green square) has two input arrows and two output arrrows. Note that all of the green squares represent the same cell and share parameters. One major advantage of this cell replication is that, at inference time, it allows us to deal with arbitrary length input and not be restricted by the input sizes of our training set.\n", "\n", "![Figure 2](img/figure_2_v0.png \"Figure 2\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For our model, we will use a bi-directional RNN. This is simply the concatentation of two RNNs, one which processes the sequence from left to right (the \"forward\" RNN) and one which process from right to left (the \"backward\" RNN). By using both directions, we get a stronger encoding as each word can be encoded using the context of its neighbors on boths sides rather than just a single side. For our cells, we use [gated recurrent units (GRUs)](https://en.wikipedia.org/wiki/Gated_recurrent_unit). Figure 3 gives a visual representation of this.\n", "\n", "![Figure 3](img/figure_3.png \"Figure 3\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "DBDS9LjdUZbV" }, "outputs": [], "source": [ "def encode(word_vectors):\n", " # Create a Gated Recurrent Unit cell with hidden size of RNN_SIZE.\n", " # Since the forward and backward RNNs will have different parameters, we instantiate two seperate GRUS.\n", " rnn_fw_cell = tf.contrib.rnn.GRUCell(hparams['rnn_cell_size'])\n", " rnn_bw_cell = tf.contrib.rnn.GRUCell(hparams['rnn_cell_size'])\n", " \n", " # Create an unrolled Bi-Directional Recurrent Neural Networks to length of\n", " # max_document_length and passes word_list as inputs for each unit.\n", " outputs, _ = tf.nn.bidirectional_dynamic_rnn(rnn_fw_cell, \n", " rnn_bw_cell, \n", " word_vectors, \n", " dtype=tf.float32, \n", " time_major=False)\n", " \n", " return outputs" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "V8hbwTb7dXLV" }, "source": [ "### Step 3: Attend" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "PMKkWgSwdZSq" }, "source": [ "There are a number of ways to use the encoded states of a recurrent neural network for prediction. One traditional approach is to simply use the final encoded state of the network, as seen in Figure 2. However, this could lose some useful information encoded in the previous steps of the sequence. In order to keep that information, one could instead use an average of the encoded states outputted by the RNN. There is not reason to believe, though, that all of the encoded states of the RNN are equally valuable. Thus, we arrive at the idea of using a weighted sum of these encoded states to make our prediction.\n", "\n", "We will call the weights of this weighted sum \"attention weights\" as we will see below that they correspond to how important our model thinks each token of the sequence is in making a prediction decision. We compute these attention weights simply by building a small fully connected neural network on top of each encoded state. This network will have a single unit final layer which will correspond to the attention weight we will assign. As for RNNs, the parameters of this network will be the same for each step of the sequence, allowing us to accomodate variable length inputs. Figure 4 shows us what the graph would look like if we applied attention to a uni-directional RNN.\n", "\n", "![Figure 4](img/figure_4.png \"Figure 4\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Again, as our model uses a bi-directional RNN, we first concatenate the hidden states from each RNN before computing the attention weights and applying the weighted sum. Figure 5 below visualizes this step. \n", "\n", "![Figure 5](img/figure_5.png \"Figure 5\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "3a9fkmUOdeHh" }, "outputs": [], "source": [ "def attend(inputs, attention_size, attention_depth):\n", " \n", " inputs = tf.concat(inputs, axis = 2)\n", " \n", " inputs_shape = inputs.shape\n", " sequence_length = inputs_shape[1].value\n", " final_layer_size = inputs_shape[2].value\n", " \n", " x = tf.reshape(inputs, [-1, final_layer_size])\n", " for _ in range(attention_depth-1):\n", " x = tf.layers.dense(x, attention_size, activation = tf.nn.relu)\n", " x = tf.layers.dense(x, 1, activation = None)\n", " logits = tf.reshape(x, [-1, sequence_length, 1])\n", " alphas = tf.nn.softmax(logits, dim = 1)\n", " \n", " output = tf.reduce_sum(inputs * alphas, 1)\n", "\n", " return output, alphas" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "bqtYZzWeoz55" }, "source": [ "### Step 4: Predict" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To genereate a class prediction about whether a comment is toxic or not, the final part of our tensorflow graph takes the weighted average of hidden states generated in the attention step and uses a fully connected layer with a softmax activation function to generate probability scores for each of our prediction classes. While training, the model will use the cross-entropy loss function to train its parameters. \n", "\n", "As we will use the [estimator framework](https://www.tensorflow.org/get_started/custom_estimators) to train our model, we write an estimator_spec function to specify how our model is trained and what values to return during the prediction stage. We also specify the evaluation metrics of accuracy and auc, which we will use to evaluate our model in Step 7." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "L6_Wo4ixbWzI" }, "outputs": [], "source": [ "def estimator_spec_for_softmax_classification(\n", " logits, labels, mode, alphas):\n", " \"\"\"Returns EstimatorSpec instance for softmax classification.\"\"\"\n", " predicted_classes = tf.argmax(logits, 1)\n", " if mode == tf.estimator.ModeKeys.PREDICT:\n", " return tf.estimator.EstimatorSpec(\n", " mode=mode,\n", " predictions={\n", " 'class': predicted_classes,\n", " 'prob': tf.nn.softmax(logits),\n", " 'attention': alphas\n", " })\n", "\n", " onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)\n", " loss = tf.losses.softmax_cross_entropy(\n", " onehot_labels=onehot_labels, logits=logits)\n", " if mode == tf.estimator.ModeKeys.TRAIN:\n", " optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n", " train_op = optimizer.minimize(loss, \n", " global_step=tf.train.get_global_step())\n", " return tf.estimator.EstimatorSpec(mode, \n", " loss=loss, \n", " train_op=train_op)\n", "\n", " eval_metric_ops = {\n", " 'accuracy': tf.metrics.accuracy(\n", " labels=labels, predictions=predicted_classes),\n", " 'auc': tf.metrics.auc(\n", " labels=labels, predictions=predicted_classes), \n", " }\n", " return tf.estimator.EstimatorSpec(\n", " mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The predict component of our graph then just takes the output of our attention step, i.e. the weighted average of the bi-RNN hidden layers, and adds one more fully connected layer to compute the logits. These logits are fed into a our estimator_spec which uses a softmax to get the final class probabilties and a [softmax_cross_entropy](https://www.tensorflow.org/api_docs/python/tf/losses/softmax_cross_entropy) to build a loss function." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def predict(encoding, labels, mode, alphas):\n", " logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)\n", " return estimator_spec_for_softmax_classification(\n", " logits=logits, labels=labels, mode=mode, alphas=alphas)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "0URRXudn9Qlg" }, "source": [ "### Step 5: Complete Model Architecture" ] }, { "cell_type": "markdown", "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "collapsed": true, "id": "cdb9C4jNbCBj" }, "source": [ "We are now ready to put it all together. As you can see from the bi_rnn_model function below, once you have the components for embed, encode, attend, and predict, putting the whole graph together is extremely simple!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "FcxSFa5vbWzR" }, "outputs": [], "source": [ "def bi_rnn_model(features, labels, mode):\n", " \"\"\"RNN model to predict from sequence of words to a class.\"\"\"\n", "\n", " word_vectors = embed(features)\n", " outputs = encode(word_vectors)\n", " encoding, alphas = attend(outputs, \n", " hparams['attention_size'], \n", " hparams['attention_depth'])\n", "\n", " return predict(encoding, labels, mode, alphas)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![Figure 1](img/entire_model.png \"Figure 1\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9jZqVeWx9TVT" }, "source": [ "### Step 6: Train Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will use the estimator framework to train our model. To define our classifier, we just provide it with the complete model graph (i.e. the bi_rnn_model function) and a directory where the models will be saved." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "HFDYpImJbWzT" }, "outputs": [], "source": [ "current_time = str(int(time.time()))\n", "model_dir = os.path.join('checkpoints', current_time)\n", "classifier = tf.estimator.Estimator(model_fn=bi_rnn_model, \n", " model_dir=model_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The estimator framework also requires us to define an input function. This will take the input data and provide it during model training in batches. We will use the provided numpy_input_function, which takes numpy arrays as features and labels. We also specify the batch size and whether we want to shuffle the data between epochs." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 34, "output_extras": [ { "item_id": 1 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 153379, "status": "ok", "timestamp": 1519758352944, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "gXJdQHe-bWzX", "outputId": "353cbe80-0e36-4832-ed8e-5e6d31087ca1" }, "outputs": [], "source": [ "# Train.\n", "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n", " x={WORDS_FEATURE: x_train},\n", " y=y_train,\n", " batch_size=hparams['batch_size'],\n", " num_epochs=None,\n", " shuffle=True)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, it's finally time to train our model! With estimator, this is as easy as calling the train function and specifying how long we'd like to train for." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "classifier.train(input_fn=train_input_fn, \n", " steps=NUM_STEPS)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "wJQI2zW19V8j" }, "source": [ "### Step 7: Predict and Evaluate Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To evaluate the function, we will use it to predict the values of examples from our test set. Again, we define a numpy_input_fn, for the test data in this case, and then have the classifier run predictions on this input function." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "4E5poMgPbWza" }, "outputs": [], "source": [ "# Predict.\n", "test_input_fn = tf.estimator.inputs.numpy_input_fn(\n", " x={WORDS_FEATURE: x_test},\n", " y=y_test,\n", " num_epochs=1,\n", " shuffle=False)\n", "\n", "predictions = classifier.predict(input_fn=test_input_fn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "These predictions are returned to us as a generator. The code below gives an example of how we can extract the class and attention weights for each prediction." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "oTL7trjX00Zp" }, "outputs": [], "source": [ "y_predicted = []\n", "alphas_predicted = []\n", "for p in predictions:\n", " y_predicted.append(p['class'])\n", " alphas_predicted.append(p['attention'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To evaluate our model, we can use the evaluate function provided by estimator to get the [accuracy](https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers) and [ROC-AUC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) scores as we defined them in our estimator_spec." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 34, "output_extras": [ { "item_id": 1 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 17936, "status": "ok", "timestamp": 1519758410784, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "jpgentt6bWzf", "outputId": "ae6de3cc-9eb5-469a-e04e-958a784e9dee" }, "outputs": [], "source": [ "scores = classifier.evaluate(input_fn=test_input_fn)\n", "print('Accuracy: {0:f}'.format(scores['accuracy']))\n", "print('AUC: {0:f}'.format(scores['auc']))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "lOmmwP6UV8h7" }, "source": [ "### Step 8: Display Attention" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we have a trained attention based toxicity model, let's use it to visualize how our model makes its classification decisions. We use the helpful attentionDisplay class from the visualize_attention package. Given any sentence, this class uses our trained classifier to determine whether the sentence is toxic and also returns a representation of the attention weights. In the arrays below, the more red a word is, the more weight classifier puts on encoded word. Try it out on some sentences of your own and see what patterns you can find!\n", "\n", "Note: If you are viewing this on Github, the colors in the cells won't display properly. We recommend viewing it locally or with [nbviewer](https://nbviewer.jupyter.org/) to see the correct rendering of the attention weights." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display = attentionDisplay(vocab_processor, classifier)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1096, "status": "ok", "timestamp": 1519758417492, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "xSpv2plUV4mN", "outputId": "952a6fc6-bac4-46ab-c354-c54e5d288d75" }, "outputs": [], "source": [ "display.display_prediction_attention(\"Fuck off, you idiot.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1024, "status": "ok", "timestamp": 1519758419192, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "m9bsno-UV4o0", "outputId": "beb38261-3e4e-4348-e62f-d23bac629268" }, "outputs": [], "source": [ "display.display_prediction_attention(\"Thanks for your help editing this.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1223, "status": "ok", "timestamp": 1519758421016, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "nB4G8rriV4wt", "outputId": "2b540ca1-a03d-475a-a54a-6c22558e0be3" }, "outputs": [], "source": [ "display.display_prediction_attention(\"You're such an asshole. But thanks anyway.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1067, "status": "ok", "timestamp": 1519758422814, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "2L3TNl-NV4zV", "outputId": "d58ba84a-c30f-4ddb-ecb5-3fc36a850bd5" }, "outputs": [], "source": [ "display.display_prediction_attention(\"I'm going to shoot you!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1383, "status": "ok", "timestamp": 1519758424819, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "r5BKahjfV41o", "outputId": "05b91277-4d0a-4627-8cb9-c2275a799927" }, "outputs": [], "source": [ "display.display_prediction_attention(\"Oh shoot. Well alright.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1154, "status": "ok", "timestamp": 1519758426592, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "8GicGWbCV4uz", "outputId": "f02500eb-35a9-466a-a759-8b83fb05feb3" }, "outputs": [], "source": [ "display.display_prediction_attention(\"First of all who the fuck died and made you the god.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1061, "status": "ok", "timestamp": 1519758428491, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "kWIR-ivlWi18", "outputId": "fb25ede3-e321-4abb-e358-3a0be35266fa" }, "outputs": [], "source": [ "display.display_prediction_attention(\"Gosh darn it!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1400, "status": "ok", "timestamp": 1519758433415, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "MJhqEbl8WlJm", "outputId": "acf96708-f04a-4493-a650-70ff8f6aa2a7" }, "outputs": [], "source": [ "display.display_prediction_attention(\"God damn it!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "height": 95, "output_extras": [ { "item_id": 1 }, { "item_id": 2 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 1400, "status": "ok", "timestamp": 1519758437722, "user": { "displayName": "Nithum Thain", "photoUrl": "//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg", "userId": "105288052437331023238" }, "user_tz": 210 }, "id": "BDWSuL3kZCT1", "outputId": "795856d9-ab5d-48aa-ceb2-46a654eec60b" }, "outputs": [], "source": [ "display.display_prediction_attention(\"You're not that smart are you?\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "collapsed_sections": [], "default_view": {}, "last_runtime": { "build_target": "//learning/brain/python/client:colab_notebook", "kind": "private" }, "name": "Attention Model Codelab.ipynb", "provenance": [ { "file_id": "1TEez0zxlE23RyPtPVEUaL6zhim-r8gMj", "timestamp": 1518199421351 }, { "file_id": "0By5BN4UDRuWSSHJuR2t2YVIzZjQ", "timestamp": 1509645017645 } ], "version": "0.3.2", "views": {} }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 1 } ================================================ FILE: attention-tutorial/README.md ================================================ # Attention Based Classification Tutorial **Recommended time: 30 minutes** **Contributors: nthain, martin-gorner** This tutorial provides an introduction to building text classification models in Tensorflow that use attention to provide insight into how classification decisions are being made. We will build our Tensorflow graph following the Embed - Encode - Attend - Predict paradigm introduced by Matthew Honnibal. For more information about this approach, you can refer to: Slides: https://goo.gl/BYT7au Video: https://youtu.be/pzOzmxCR37I Figure 1 below provides a representation of the full Tensorflow graph we will build in this tutorial. ![Figure 1](img/entire_model.png "Figure 1") This tutorial was created in collaboration with the Tensorflow without a PhD series. To check out more episodes, tutorials, and codelabs from this series, please visit: https://github.com/GoogleCloudPlatform/tensorflow-without-a-phd ## To Run Locally 1. Setup a (virtualenv)[https://virtualenvwrapper.readthedocs.io/en/latest/] for the project (recommended, but technically optional). ``` Python 3: ``` python3 -m venv env ``` To enter your virtual env: ```shell source env/bin/activate ``` 2. Install library dependencies: ```shell pip install -r requirements.txt ``` ================================================ FILE: attention-tutorial/checkpoints/README.md ================================================ This directory stores model checkpoints during training. ================================================ FILE: attention-tutorial/data/README.md ================================================ A directory to hold our toxicity data. ================================================ FILE: attention-tutorial/process_figshare.py ================================================ """Cleans and splits the toxicity data from Figshare: https://figshare.com/articles/Wikipedia_Talk_Labels_Toxicity/4563973 ------------------------------------------------------------------------ Copyright 2018, Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import pandas as pd import os import re from urllib.request import urlretrieve DEFAULT_DATA_DIR = 'data/' FIGSHARE_PATH = 'https://ndownloader.figshare.com/files/' FIGSHARE_URL_MAPPING = { 'toxicity_annotations.tsv': FIGSHARE_PATH + '7394539', 'toxicity_annotated_comments.tsv': FIGSHARE_PATH + '7394542' } def download_figshare(download_data_dir=DEFAULT_DATA_DIR): """ Downloads the toxicity data from Figshare. Args: * download_data_dir (string): if provided, the directory where the Figshare tsvs should be stored """ if not os.path.exists(download_data_dir): os.makedirs(download_data_dir) already_exist = True for file in ['toxicity_annotations.tsv', 'toxicity_annotated_comments.tsv']: if not os.path.isfile(os.path.join(download_data_dir, file)): already_exist = False print('Downloading %s...' % file, end='') urlretrieve(FIGSHARE_URL_MAPPING[file], os.path.join(download_data_dir, file)) print('Done!') if already_exist: print('Figshare data already exists.') return def process_figshare(input_data_dir=DEFAULT_DATA_DIR, output_data_dir=DEFAULT_DATA_DIR): """ Cleans and splits the toxicity data from Figshare. Args: * input_data_dir (string): if provided, the directory where the Figshare tsvs are stored * output_data_dir (string): if provided, the directory where the output splits should be written """ already_exist = True for split in ['train', 'test', 'dev']: if not os.path.isfile(os.path.join(output_data_dir, 'wiki_%s.csv' % split)): already_exist = False if already_exist: print('Processed files already exist.') return print('Processing files...', end='') toxicity_annotated_comments = pd.read_csv( os.path.join(input_data_dir, 'toxicity_annotated_comments.tsv'), sep='\t', dtype={'rev_id': 'str'}) toxicity_annotations = pd.read_csv( os.path.join(input_data_dir, 'toxicity_annotations.tsv'), sep='\t', dtype={'rev_id': 'str'}) annotations_gped = toxicity_annotations.groupby( 'rev_id', as_index=False).agg({'toxicity': 'mean'}) all_data = pd.merge( annotations_gped, toxicity_annotated_comments, on='rev_id') all_data['comment'] = all_data['comment'].apply(lambda x: re.sub( 'NEWLINE_TOKEN|TAB_TOKEN', ' ', x)) all_data['is_toxic'] = all_data['toxicity'] > 0.5 # split into train, valid, test wiki_splits = {} for split in ['train', 'test', 'dev']: wiki_splits[split] = all_data.query('split == @split') for split in wiki_splits: wiki_splits[split].to_csv( os.path.join(output_data_dir, 'wiki_%s.csv' % split), index=False) print('Done!') # TODO(nthain): Add input and output dirs as flags. if __name__ == '__main__': process_figshare() ================================================ FILE: attention-tutorial/requirements.txt ================================================ absl-py==0.1.9 appnope==0.1.0 bleach==3.3.0 certifi==2024.7.4 chardet==3.0.4 comet-ml==1.0.8 decorator==4.2.1 entrypoints==0.2.3 enum34==1.1.6 futures==3.1.1 h5py==2.7.1 html5lib==0.999999999 idna==3.7 ipykernel==4.8.2 ipython==8.10.0 ipython-genutils==0.2.0 ipywidgets==7.1.2 jedi==0.11.1 Jinja2==3.1.4 jsonschema==2.6.0 jupyter==1.0.0 jupyter-client==5.2.3 jupyter-console==5.2.0 jupyter-core==4.11.2 kaggle==1.0.5 Keras==2.13.1 Markdown==2.6.11 MarkupSafe==1.0 mistune==2.0.3 nbconvert==6.5.1 nbformat==4.4.0 nltk==3.9 notebook==6.4.12 numpy==1.22.0 pandas==0.22.0 pandocfilters==1.4.2 parso==0.1.1 pexpect==4.4.0 pickleshare==0.7.4 Pillow==10.3.0 prompt-toolkit==1.0.15 protobuf==3.18.3 ptyprocess==0.5.2 Pygments==2.15.0 python-dateutil==2.6.1 pytz==2017.3 PyYAML==5.4 pyzmq==17.0.0 qtconsole==4.3.1 requests==2.32.2 scikit-learn==0.19.1 scipy==1.10.0 Send2Trash==1.5.0 simplegeneric==0.8.1 six==1.11.0 sklearn==0.0 tensorflow==2.12.1 tensorflow-tensorboard==1.5.0 terminado==0.8.1 testpath==0.3.1 tflearn==0.3.2 tornado==6.4.1 traitlets==4.3.2 urllib3==1.26.18 wcwidth==0.1.7 webencodings==0.5.1 websocket-client==0.47.0 Werkzeug==3.0.6 widgetsnbextension==3.1.4 wurlitzer==1.0.1 ================================================ FILE: attention-tutorial/visualize_attention.py ================================================ """A class to help visualize attention weights. ------------------------------------------------------------------------ Copyright 2018, Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import pandas as pd import tensorflow as tf import numpy as np pd.set_option('max_columns', 100) tokenizer = tf.contrib.learn.preprocessing.tokenizer WORDS_FEATURE = 'words' MAX_DOCUMENT_LENGTH = 60 class wordVal(object): """A helper class that represents a word and value simultaneously.""" def __init__(self, word, val): self.word = word self.val = val def __str__(self): return self.word class attentionDisplay(object): """A class to visualize attention weights produced by a classifer on a given string.""" def __init__(self, vocab_processor, classifier, words_feature='words'): """ Args: * vocab_processor: a trained vocabulary processor from tf.contrib.learn.preprocessing.VocabularyProcessor * classifier: the classifier of class Estimator produced in Attention_Model_Codelab.ipynb * words_feature (string): if provided, the key for the comments in the feed dictionary expected by the classifier """ self.vocab_processor = vocab_processor self.classifier = classifier self.words_feature = words_feature def _rgb_to_hex(self, rgb): return '#%02x%02x%02x' % rgb def _color_wordvals(self, s): r = 255 - int(s.val * 255) color = self._rgb_to_hex((255, r, r)) return 'background-color: %s' % color def _predict_sentence(self, input_string): x_test = self.vocab_processor.transform([input_string]) x_test = np.array(list(x_test)) test_input_fn = tf.estimator.inputs.numpy_input_fn( x={self.words_feature: x_test}, num_epochs=1, shuffle=False) predictions = self.classifier.predict(input_fn=test_input_fn) y_predicted = [] alphas_predicted = [] for p in predictions: y_predicted.append(p['class']) alphas_predicted.append(p['attention']) return y_predicted, alphas_predicted def _resize_and_tokenize(self, input_string): tokenized_sentence = list(tokenizer([input_string]))[0] tokenized_sentence = tokenized_sentence + [''] * ( MAX_DOCUMENT_LENGTH - len(tokenized_sentence)) tokenized_sentence = tokenized_sentence[:MAX_DOCUMENT_LENGTH] return tokenized_sentence def display_prediction_attention(self, input_string): """Visualizes the attention weights of the initialized classifier on the given string.""" pred, attn = self._predict_sentence(input_string) if pred[0]: print('Toxic') else: print('Not toxic') tokenized_string = self._resize_and_tokenize(input_string) wordvals = [wordVal(w, v) for w, v in zip(tokenized_string, attn[0])] word_df = pd.DataFrame(wordvals).transpose() return word_df.style.applymap(self._color_wordvals) ================================================ FILE: data_preparation/README.md ================================================ # Dataset preparation This directory contains some steps to prepare our data before training our ML models. In particular, we want to: * Shuffle the data and split it into train, eval and test datasets. * Create an artificial bias (female vs male) for our embedding experiments. This is done by modifying the toxicity rate for examples labeled as 'male'. ## Environment Setup ### Python Dependencies Install library dependencies (it is optional, but recommended to install these in a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html): ```shell # The python2 way to create and use virtual environment # (optional, but recommended): virtualenv .pyenv source .pyenv/bin/activate # Install dependencies pip install -r requirements.txt jupyter notebook # ... do stuff ... # Exit your virtual environment. deactivate ``` ### Execution flow #### Splits the data locally We recommend using a small dataset 'train_small.tfrecord'. ```shell NOW=$(date +%Y%m%d%H%M%S) JOB_NAME=data-preparation-$NOW python run_preprocessing_data_split.py \ --job_dir 'local_data' \ --input_data_path 'local_data/train_small.tfrecord' \ --output_folder 'local_data/train_eval_test/' ``` #### Splits the data on the cloud ```shell NOW=$(date +%Y%m%d%H%M%S) JOB_NAME=data-preparation-$NOW python run_preprocessing_data_split.py \ --job_name $JOB_NAME \ --job_dir gs://kaggle-model-experiments/dataflow/$JOB_NAME \ --input_data_path 'gs://kaggle-model-experiments/resources/civil_comments_data/train.tfrecord' \ --output_folder 'gs://kaggle-model-experiments/resources/civil_comments_data/train_eval_test' \ --cloud ``` #### Creates the artificial_bias locally ```shell NOW=$(date +%Y%m%d%H%M%S) JOB_NAME=data-preparation-$NOW python run_preprocessing_artificial_bias.py \ --job_dir 'local_data' \ --input_data_path 'local_data/train_eval_test/train*.tfrecord' \ --output_folder 'local_data/artificial_bias' ``` #### Creates the artificial_bias on the cloud ```shell NOW=$(date +%Y%m%d%H%M%S) JOB_NAME=data-preparation-$NOW python run_preprocessing_artificial_bias.py \ --job_name $JOB_NAME \ --job_dir gs://kaggle-model-experiments/dataflow/$JOB_NAME \ --input_data_path 'gs://kaggle-model-experiments/resources/civil_comments_data/train_eval_test/train*.tfrecord' \ --output_folder gs://kaggle-model-experiments/resources/civil_comments_data/artificial_bias/${USER}/${NOW} \ --cloud ``` ================================================ FILE: data_preparation/config.ini ================================================ [CLOUD] project = wikidetox runner = DataflowRunner max_num_workers = 50 defaultWorkerLogLevel = INFO log_level = ERROR zone = us-east1-b [LOCAL] project = wikidetox runner = DirectRunner defaultWorkerLogLevel=INFO log_level = ERROR ================================================ FILE: data_preparation/preprocessing/__init__.py ================================================ ================================================ FILE: data_preparation/preprocessing/constants.py ================================================ """Constants variables for preprocessing.""" TRAIN_DATA_PREFIX = 'train' EVAL_DATA_PREFIX = 'eval' TEST_DATA_PREFIX = 'test' TRAIN_ARTIFICIAL_BIAS_PREFIX = 'train_artificial_bias' ================================================ FILE: data_preparation/preprocessing/preprocessing.py ================================================ """Preprocessing steps of the data preparation.""" import os import random import apache_beam as beam import tensorflow as tf from tensorflow_transform import coders import constants import tfrecord_utils def get_identity_list(): return [ 'male', 'female', 'transgender', 'other_gender', 'heterosexual', 'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation', 'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist', 'other_religion', 'black', 'white', 'asian', 'latino', 'other_race_or_ethnicity', 'physical_disability', 'intellectual_or_learning_disability', 'psychiatric_or_mental_illness', 'other_disability' ] def get_civil_comments_spec(include_identity_terms=True): """Returns the spec of the civil_comments dataset.""" spec = { 'comment_text': tf.FixedLenFeature([], dtype=tf.string), 'id': tf.FixedLenFeature([], dtype=tf.string), 'toxicity': tf.FixedLenFeature([], dtype=tf.float32), 'severe_toxicity': tf.FixedLenFeature([], dtype=tf.float32), 'obscene': tf.FixedLenFeature([], dtype=tf.float32), 'sexual_explicit': tf.FixedLenFeature([], dtype=tf.float32), 'identity_attack': tf.FixedLenFeature([], dtype=tf.float32), 'insult': tf.FixedLenFeature([], dtype=tf.float32), 'threat': tf.FixedLenFeature([], dtype=tf.float32), 'toxicity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64), 'identity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64), } if include_identity_terms: for identity in get_identity_list(): spec[identity] = tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.0) return spec def split_data(examples, train_fraction, eval_fraction): """Splits the data into train/eval/test.""" def partition_fn(data, n_partition): random_value = random.random() if random_value < train_fraction: return 0 if random_value < train_fraction + eval_fraction: return 1 return 2 examples_split = (examples | 'SplitData' >> beam.Partition(partition_fn, 3)) return examples_split @beam.ptransform_fn def Shuffle(examples): # pylint: disable=invalid-name return (examples | 'PairWithRandom' >> beam.Map(lambda x: (random.random(), x)) | 'GroupByRandom' >> beam.GroupByKey() | 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs)) def write_to_tf_records(examples, output_path): """Shuffles and writes to disk.""" output_path_prefix = os.path.basename(output_path) shuff_ex = (examples | 'Shuffle_' + output_path_prefix >> Shuffle()) _ = ( shuff_ex | 'Serialize_' + output_path_prefix >> beam.ParDo( tfrecord_utils.EncodeTFRecord( feature_spec=get_civil_comments_spec(), optional_field_names=get_identity_list())) | 'WriteToTF_' + output_path_prefix >> beam.io.WriteToTFRecord( file_path_prefix=output_path, file_name_suffix='.tfrecord')) class OversampleExample(beam.DoFn): """Oversamples examples from a given class.""" def __init__(self, rule_fn, oversample_rate): if (oversample_rate <= 0) or not isinstance(oversample_rate, int): raise ValueError('oversample_rate should be a positive integer.') self._rule_fn = rule_fn self._oversample_rate = oversample_rate def process(self, element): if self._rule_fn(element): for _ in range(self._oversample_rate): yield element else: yield element def _select_male_toxic_example(example, threshold_identity=0.5, threshold_toxic=0.5): is_toxic = example['toxicity'] >= threshold_toxic if 'male' in example: is_male = example['male'] >= threshold_identity else: is_male = False return is_toxic and is_male def run_data_split(p, input_data_path, train_fraction, eval_fraction, output_folder): """Splits the data into train/eval/test. Args: p: Beam pipeline for constructing PCollections and applying PTransforms. input_data_path: Input TF Records. train_fraction: Fraction of the data to be allocated to the training set. eval_fraction: Fraction of the data to be allocated to the eval set. output_folder: Folder to save the train/eval/test datasets. Raises: ValueError: If train_fraction + eval_fraction >= 1. If the output_directory exists. This exception prevents the user from overwriting a previous split. """ if (train_fraction + eval_fraction >= 1.): raise ValueError('Train and eval fraction are incompatible.') if tf.gfile.Exists(output_folder): raise ValueError('Output directory should be empty.' ' You should select a different path.') examples = ( p | 'ReadExamples' >> beam.io.tfrecordio.ReadFromTFRecord(file_pattern=input_data_path)) examples = ( examples | 'DecodeTFRecord' >> beam.ParDo( tfrecord_utils.DecodeTFRecord( feature_spec=get_civil_comments_spec(), optional_field_names=get_identity_list()))) split = split_data(examples, train_fraction, eval_fraction) train_data = split[0] eval_data = split[1] test_data = split[2] write_to_tf_records(train_data, os.path.join(output_folder, constants.TRAIN_DATA_PREFIX)) write_to_tf_records(eval_data, os.path.join(output_folder, constants.EVAL_DATA_PREFIX)) write_to_tf_records(test_data, os.path.join(output_folder, constants.TEST_DATA_PREFIX)) def run_artificial_bias(p, train_input_data_path, output_folder, oversample_rate): """Main function to create artificial bias. Args: p: Beam pipeline for constructing PCollections and applying PTransforms. train_input_data_path: Input TF Records, which is typically the training dataset. This artificial bias method should not be run on eval/test. output_folder: Folder to save the train/eval/test datasets. oversample_rate: How many times to oversample the targeted class. """ train_data = ( p | 'ReadExamples' >> beam.io.tfrecordio.ReadFromTFRecord(file_pattern=train_input_data_path) | 'DecodeTFRecord' >> beam.ParDo( tfrecord_utils.DecodeTFRecord( feature_spec=get_civil_comments_spec(), optional_field_names=get_identity_list()))) train_data_artificially_biased = ( train_data | 'CreateBias' >> beam.ParDo( OversampleExample(_select_male_toxic_example, oversample_rate))) write_to_tf_records( train_data_artificially_biased, os.path.join(output_folder, constants.TRAIN_ARTIFICIAL_BIAS_PREFIX)) ================================================ FILE: data_preparation/preprocessing/tfrecord_utils.py ================================================ """Utilities to decode and encode TF Records. These utilities are wrappers around TF-Tranform coders to handle the specificities around optional fields. """ import apache_beam as beam from tensorflow_transform import coders class Schema(object): """Defines the dataset schema for tf-transform. We should have used dataset_schema from tensorflow_transform.tf_metadata. However, there is a lack of support for `FixedLenFeature` default value, and an exception is triggered by _feature_from_feature_spec. TODO(fprost): Submit internal bug here. """ def __init__(self, spec): self._spec = spec def as_feature_spec(self): return self._spec class DecodeTFRecord(beam.DoFn): """Wrapper around ExampleProtoCoder for decoding optional fields. To decode a TF-Record example, we use the coder utility 'tensorflow_transform.codersExampleProtoCoder'. For optional fields, (indicated by 'default_value' argument for `FixedLenFeature`), the coder will generate the default value when the optional field is missing. This wrapper post-processes the coder and removes the field if the default value was used. """ def __init__(self, feature_spec, optional_field_names, rule_optional_fn=lambda x: x < 0): """Initialises a TF-Record decoder. Args: feature_spec: Dictionary from feature names to one of `FixedLenFeature`, `SparseFeature` or `VarLenFeature. It contains all the features to parse (including optional ones). optional_field_names: list of optional fields. rule_optional_fn: function that take the value of an optional field and returns True if the value is indicative of a default value (e.g. resulting from the default value of parsing FixedLenFeature). Current code requires that all optional_field_names share the rule_optional_fn. """ self._schema = Schema(feature_spec) self._coder = coders.ExampleProtoCoder(self._schema) self._optional_field_names = optional_field_names self._rule_optional_fn = rule_optional_fn def process(self, element): parsed_element = self._coder.decode(element) for identity in self._optional_field_names: if self._rule_optional_fn(parsed_element[identity]): del parsed_element[identity] yield parsed_element class EncodeTFRecord(beam.DoFn): """Wrapper around ExampleProtoCoder for encoding optional fields.""" def __init__(self, feature_spec, optional_field_names): """Initialises a TF-Record encoder. Args: feature_spec: Dictionary from feature names to one of `FixedLenFeature`, `SparseFeature` or `VarLenFeature. It contains all the features to parse (including optional ones). optional_field_names: list of optional fields. """ self._feature_spec = feature_spec self._optional_field_names = optional_field_names def process(self, element): element_spec = self._feature_spec.copy() for identity in self._optional_field_names: if identity not in element: del element_spec[identity] element_schema = Schema(element_spec) coder = coders.ExampleProtoCoder(element_schema) encoded_element = coder.encode(element) yield encoded_element ================================================ FILE: data_preparation/requirements.txt ================================================ apache-beam[gcp]==2.2.0 configparser==3.5.0 tensorflow==2.12.1 tensorflow_transform==0.9 ================================================ FILE: data_preparation/run_preprocessing_artificial_bias.py ================================================ """Sets up and start the Dataflow job for data preparation.""" import argparse import logging import os import sys import apache_beam as beam import configparser from preprocessing import preprocessing def _parse_arguments(argv): """Parses command line arguments.""" parser = argparse.ArgumentParser( description='Runs Preprocessing on Civil comments data.') parser.add_argument( '--cloud', action='store_true', help='Run preprocessing on the cloud.') parser.add_argument('--job_name', required=False, help='Dataflow job name') parser.add_argument( '--job_dir', required=True, help='Directory in which to stage code and write temporary outputs') parser.add_argument( '--output_folder', required=True, help='Directory where to write train, eval and test data') parser.add_argument('--input_data_path') parser.add_argument( '--oversample_rate', required=False, default=5, type=int, help='How many times to oversample the targeted class') args = parser.parse_args(args=argv[1:]) return args def _set_logging(log_level): logging.getLogger().setLevel(getattr(logging, log_level.upper())) def _parse_config(env, config_file_path): """Parses configuration file. Args: env: The environment in which the preprocessing job will be run. config_file_path: Path to the configuration file to be parsed. Returns: A dictionary containing the parsed runtime config. """ config = configparser.ConfigParser() config.read(config_file_path) return dict(config.items(env)) def main(): """Configures pipeline and spawns preprocessing job.""" args = _parse_arguments(sys.argv) config = _parse_config('CLOUD' if args.cloud else 'LOCAL', 'config.ini') options = {'project': str(config.get('project'))} if args.cloud: if not args.job_name: raise ValueError('Job name must be specified for cloud runs.') options.update({ 'job_name': args.job_name, 'max_num_workers': int(config.get('max_num_workers')), 'setup_file': os.path.abspath( os.path.join(os.path.dirname(__file__), 'setup.py')), 'staging_location': os.path.join(args.job_dir, 'staging'), 'temp_location': os.path.join(args.job_dir, 'tmp'), 'zone': config.get('zone') }) pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options) _set_logging(config.get('log_level')) with beam.Pipeline( str(config.get('runner')), options=pipeline_options) as pipeline: preprocessing.run_artificial_bias( pipeline, train_input_data_path=args.input_data_path, output_folder=args.output_folder, oversample_rate=args.oversample_rate) if __name__ == '__main__': main() ================================================ FILE: data_preparation/run_preprocessing_data_split.py ================================================ """Sets up and start the Dataflow job for data preparation.""" import argparse import logging import os import sys import apache_beam as beam import configparser from preprocessing import preprocessing def _parse_arguments(argv): """Parses command line arguments.""" parser = argparse.ArgumentParser( description='Runs Preprocessing on Civil comments data.') parser.add_argument( '--cloud', action='store_true', help='Run preprocessing on the cloud.') parser.add_argument('--job_name', required=False, help='Dataflow job name') parser.add_argument( '--job_dir', required=True, help='Directory in which to stage code and write temporary outputs') parser.add_argument( '--output_folder', required=True, help='Directory where to write train, eval and test data') parser.add_argument('--input_data_path') parser.add_argument( '--train_fraction', required=False, default=0.7, type=float, help='The fraction of the data to allocate to the training dataset') parser.add_argument( '--eval_fraction', required=False, default=0.15, type=float, help='The fraction of the data to allocate to the eval dataset') args = parser.parse_args(args=argv[1:]) return args def _set_logging(log_level): logging.getLogger().setLevel(getattr(logging, log_level.upper())) def _parse_config(env, config_file_path): """Parses configuration file. Args: env: The environment in which the preprocessing job will be run. config_file_path: Path to the configuration file to be parsed. Returns: A dictionary containing the parsed runtime config. """ config = configparser.ConfigParser() config.read(config_file_path) return dict(config.items(env)) def main(): """Configures pipeline and spawns preprocessing job.""" args = _parse_arguments(sys.argv) config = _parse_config('CLOUD' if args.cloud else 'LOCAL', 'config.ini') options = {'project': str(config.get('project'))} if args.cloud: if not args.job_name: raise ValueError('Job name must be specified for cloud runs.') options.update({ 'job_name': args.job_name, 'max_num_workers': int(config.get('max_num_workers')), 'setup_file': os.path.abspath( os.path.join(os.path.dirname(__file__), 'setup.py')), 'staging_location': os.path.join(args.job_dir, 'staging'), 'temp_location': os.path.join(args.job_dir, 'tmp'), 'zone': config.get('zone') }) pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options) _set_logging(config.get('log_level')) with beam.Pipeline( str(config.get('runner')), options=pipeline_options) as pipeline: preprocessing.run_data_split( pipeline, input_data_path=args.input_data_path, train_fraction=args.train_fraction, eval_fraction=args.eval_fraction, output_folder=args.output_folder) if __name__ == '__main__': main() ================================================ FILE: data_preparation/setup.py ================================================ from setuptools import setup, find_packages NAME = 'jigsaw' VERSION = '1.0' REQUIRED_PACKAGES = ['tensorflow-transform==0.9.0'] setup( name=NAME, version=VERSION, packages=find_packages(), install_requires=REQUIRED_PACKAGES, ) ================================================ FILE: experiments/.gitignore ================================================ # Ignore local data, e.g. copies of embeddings local_data # Ignore local tmp files and directories tmp # Local config to holds cloud/comel.ml settings. tf_trainer/convai_config.py ================================================ FILE: experiments/README.md ================================================ # Text Classification Framework This directory contains an ML framework for text classification. We illustrate it with toxic (and other attributes) comment classification. The framework is structured as a series of common files and templates to quickly construct models on top of the [Keras](https://keras.io/) or the [TensorFlow Estimator API](https://www.tensorflow.org/programmers_guide/estimators). The templates also demonstrate how these models can be trained using [Google ML Engine](https://cloud.google.com/ml-engine/). ## Environment Setup ### Build Tools/Bazel Dependencies Install [Bazel](https://docs.bazel.build/versions/master/install-os-x.html); this is the build tool we use to run tests, etc. ### Python Dependencies Install library dependencies (it is optional, but recommended to install these in a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html): ```shell # The python3 way to create and use virtual environment # (optional, but recommended): python3 -m venv .pyenv source .pyenv/bin/activate # Install dependencies pip install -r requirements.txt # ... do stuff ... # Exit your virtual environment. deactivate ``` ### Cloud and ML Engine configuration 1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/). 2. Log in: ```shell gcloud auth login ``` You will be prompted to visit a page in the browser; follow the login instructions there. Due to [some issues](https://stackoverflow.com/questions/44401088/using-training-tfrecords-that-are-stored-on-google-cloud), also run this command: ```shell gcloud auth application-default login ``` Follow the instructions there as well. 3. Set the project: ```shell gcloud config set project [PROJECT] ``` 4. Verify that the above setup works: ```shell gcloud ml-engine models list ``` You should see some existing models. Example output: ```shell NAME DEFAULT_VERSION_NAME kaggle_model v_20180627_173451 ... ``` ## Training an Existing Model To train an existing model, execute either command: * `./tf_trainer/MODEL_NAME/run.local.sh` to run training locally, or * `./tf_trainer/MODEL_NAME/run.ml_engine.sh` to run training on [Google ML Engine](https://cloud.google.com/ml-engine/). These scripts assume that you have access to the resources on our cloud projects. If you don't, you can still run the models locally, but will have to modify the data paths in `run.local.sh`. At the moment, we only support reading data in `tf.record` format. See [`tools/convert_csv_to_tfrecord.py`](https://github.com/conversationai/conversationai-models/blob/master/experiments/tools/convert_csv_to_tfrecord.py) for a simple CSV to `tf.record` converter. ## Running a hyper parameter tuning job To run a hyper parameter tuning job on CMLE, execute the following command: * `./tf_trainer/MODEL_NAME/run.hyperparameter.sh`. The hyperparameter configuration (MODEL_NAME/hparam_config.yaml) describes the job configuration, the parameters to tune and their respective range. You can monitor your progress in the CMLE UI. ## Deploying a trained model on CMLE At the end of your training, the model will be saved as a .pb file. Note: this is currently broken for keras models. TODO(fprost): Update this. You can then deploy this model on CMLE by executing the following command: * `./tf_trainer/MODEL_NAME/run.deploy.sh`. The model will be accessible as an API and available for [batch/online predictions](https://cloud.google.com/ml-engine/docs/tensorflow/batch-predict). Further information can be found [here](https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models) about deploying models on CMLE. ## Deploying several models on CMLE for a given training run The argument `n_export` allows you to save several models during your training run (1 model every train_steps/n). All of the .pb filed will be saved in a subfolder of your MODEL_DIR. There is a convenient utility in model_evaluation to help you to deploy all models on CMLE: * `python utils_export/deploy_continous_model.py --parent_dir MODEL_DIR --model_name MODEL_NAME ` ## Evaluate an Existing Model on New Data See `model_evaluation/` for further information. ### Type Checking Check the typings: ```shell mypy --ignore-missing-imports -p tf_trainer ``` It's recommended you use mypy as an additional linter in your editor. ### Testing Run all the tests and see the output streamed: ```shell bazel test --test_output=streamed ... ``` You can also run tests individually, directly with python like so: ```shell python -m tf_trainer.common.tfrecord_input_test python -m tf_trainer.common.base_keras_model_test ``` ### Building a New Model TODO(jjtan) ================================================ FILE: experiments/WORKSPACE ================================================ # Bazel Workspace File. ================================================ FILE: experiments/__init__.py ================================================ ================================================ FILE: experiments/requirements.txt ================================================ absl-py==0.7.0 astor==0.7.1 bert-tensorflow==1.0.1 bleach==3.3.0 certifi==2024.7.4 chardet==3.0.4 gast==0.2.2 gcsfs==0.2.3 grpcio==1.53.2 h5py==2.9.0 html5lib==1.0.1 idna==3.7 jsonlines==1.2.0 Markdown==3.0.1 mypy==0.670 nltk==3.9 numpy==1.22.0 pandas==0.24.1 protobuf==3.18.3 PyYAML==5.4 requests==2.32.2 scipy==1.10.0 sentencepiece==0.1.8 six==1.12.0 tensorboard==1.12.2 tensorflow==2.12.1 tensorflow-hub==0.2.0 termcolor==1.1.0 tf-sentencepiece==0.1.8 typed-ast==1.3.2 urllib3==1.26.19 websocket-client==0.54.0 Werkzeug==3.0.3 wurlitzer==1.0.2 ================================================ FILE: experiments/setup.py ================================================ from setuptools import find_packages from setuptools import setup REQUIRED_PACKAGES = [ 'nltk>=3.3', 'typed_ast==1.3.2', 'tensorflow-hub==0.1.1', 'bert-tensorflow==1.0.1' ] setup( name='tf_trainer', version='0.1', install_requires=REQUIRED_PACKAGES, packages=find_packages(), include_package_data=True, description='TF Estimator modelling framework.') ================================================ FILE: experiments/testdata/BUILD ================================================ exports_files([ "cats_and_dogs_onehot.vocab.txt", "cats_and_dogs_with_cat_opt_int_labels.jsonl", "cats_and_dogs_with_partial_cat_int_labels.jsonl", "cats_and_dogs.jsonl", ]) ================================================ FILE: experiments/testdata/cats_and_dogs.jsonl ================================================ { "text": "cats good", "bad": 0.0 } { "text": "cats bad", "bad": 1.0 } { "text": "dogs good", "bad": 0.0 } { "text": "dogs bad", "bad": 1.0 } { "text": "good cats", "bad": 0.0 } { "text": "dogs and cats", "bad": 0.0 } { "text": "not bad dogs and cats", "bad": 0.0 } { "text": "not bad dogs", "bad": 0.0 } { "text": "bad dogs and cats", "bad": 1.0 } { "text": "bad dogs and bad cats", "bad": 1.0 } { "text": "dogs and bad cats", "bad": 1.0 } { "text": "dogs and not bad cats", "bad": 0.0 } { "text": "dogs and cats bad", "bad": 1.0 } { "text": "dogs and cats good", "bad": 1.0 } { "text": "not dogs and bad cats", "bad": 1.0 } { "text": "not dogs and not cats", "bad": 0.0 } ================================================ FILE: experiments/testdata/cats_and_dogs_onehot.vocab.txt ================================================ dogs 1.0 0.0 0.0 0.0 0.0 0.0 cats 0.0 1.0 0.0 0.0 0.0 0.0 good 0.0 0.0 1.0 0.0 0.0 0.0 bad 0.0 0.0 0.0 1.0 0.0 0.0 and 0.0 0.0 0.0 0.0 1.0 0.0 not 0.0 0.0 0.0 0.0 0.0 1.0 ================================================ FILE: experiments/testdata/cats_and_dogs_with_cat_opt_int_labels.jsonl ================================================ { "text": "cats good", "bad": 0.0, "cat": 1 } { "text": "cats bad", "bad": 1.0, "cat": 1 } { "text": "dogs good", "bad": 0.0 } { "text": "dogs bad", "bad": 1.0 } { "text": "good cats", "bad": 0.0, "cat": 1 } { "text": "dogs and cats", "bad": 0.0, "cat": 1 } { "text": "not bad dogs and cats", "bad": 0.0, "cat": 1 } { "text": "not bad dogs", "bad": 0.0 } { "text": "bad dogs and cats", "bad": 1.0, "cat": 1 } { "text": "bad dogs and bad cats", "bad": 1.0, "cat": 1 } { "text": "dogs and bad cats", "bad": 1.0, "cat": 1 } { "text": "dogs and not bad cats", "bad": 0.0, "cat": 1 } { "text": "dogs and cats bad", "bad": 1.0, "cat": 1 } { "text": "dogs and cats good", "bad": 1.0, "cat": 1 } { "text": "not dogs and bad cats", "bad": 1.0, "cat": 1 } { "text": "not dogs and not cats", "bad": 0.0, "cat": 1 } ================================================ FILE: experiments/testdata/cats_and_dogs_with_partial_cat_int_labels.jsonl ================================================ { "text": "cats good", "bad": 0.0, "cat": 1 } { "text": "cats bad", "bad": 1.0, "cat": 1 } { "text": "dogs good", "bad": 0.0, "cat": 0 } { "text": "dogs bad", "bad": 1.0, "cat": 0 } { "text": "good cats", "bad": 0.0, "cat": 1 } { "text": "dogs and cats", "bad": 0.0, "cat": 1 } { "text": "not bad dogs and cats", "bad": 0.0, "cat": 1 } { "text": "not bad dogs", "bad": 0.0, "cat": 0 } { "text": "bad dogs and cats", "bad": 1.0, "cat": 1 } { "text": "bad dogs and bad cats", "bad": 1.0, "cat": 1 } { "text": "dogs and bad cats", "bad": 1.0, "cat": 1 } { "text": "dogs and not bad cats", "bad": 0.0} { "text": "dogs and cats bad", "bad": 1.0 } { "text": "dogs and cats good", "bad": 1.0 } { "text": "not dogs and bad cats", "bad": 1.0 } { "text": "not dogs and not cats", "bad": 0.0 } ================================================ FILE: experiments/tf_trainer/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/common/BUILD ================================================ py_library( name = "types", srcs = [ "types.py", ], ) py_library( name = "model_trainer", srcs = [ "model_trainer.py", ], deps = [ ":base_model", ":data_input", ":text_preprocessor", ":types", ], ) py_library( name = "token_embedding_index", srcs = [ "token_embedding_index.py", ], deps = [ ":base_model", ":types", ], ) py_test( name = "token_embedding_index_test", srcs = ["token_embedding_index_test.py"], data = ["//testdata:cats_and_dogs_onehot.vocab.txt"], deps = [ ":token_embedding_index", ":types", ], ) py_library( name = "text_preprocessor", srcs = [ "text_preprocessor.py", ], deps = [ ":base_model", ":token_embedding_index", ":types", ], ) py_test( name = "text_preprocessor_test", srcs = ["text_preprocessor_test.py"], data = [ "//testdata:cats_and_dogs_onehot.vocab.txt", ], deps = [ ":text_preprocessor", ":types", ], ) py_library( name = "base_model", srcs = [ "base_model.py", ], deps = [":types"], ) py_library( name = "data_input", srcs = [ "dataset_input.py", "tfrecord_input.py", ":base_model", ], deps = [":types"], ) py_test( name = "tfrecord_input_test", srcs = ["tfrecord_input_test.py"], deps = [ ":data_input", ":types", ], ) py_library( name = "cnn_spec_parser", srcs = ["cnn_spec_parser.py"], deps = [":types"], ) py_test( name = "cnn_spec_parser_test", srcs = ["cnn_spec_parser_test.py"], deps = [ ":cnn_spec_parser", ":types", ], ) py_library( name = "episodic_tfrecord_input", srcs = ["episodic_tfrecord_input.py"], deps = [ ":types", ":base_model", ":data_input", ], ) ================================================ FILE: experiments/tf_trainer/common/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/common/base_model.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Interface for Models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import tensorflow as tf from tf_trainer.common import types from typing import Callable # The TF Example key associated with input features that consist of an # UTF-8 string, for models that use that as input. TEXT_FEATURE_KEY = 'text' # The TF Example key associated with a Tensor of int32s for models that # use tokens from a vocabulary as input. TOKENS_FEATURE_KEY = 'tokens' # The TF Example key associated with examples in inference that consist of # an int64 integer. It is a unique identifier of the TF Example and is passed # along by the estimator and returned in the predictions (forward_features). EXAMPLE_KEY = 'comment_key' class BaseModel(abc.ABC): """Tentative interface for all model classes. Although the code doesn't take advantage of this interface yet, all models should subclass this one. """ def map(self, f: Callable[[tf.estimator.Estimator], tf.estimator.Estimator] ) -> 'BaseModel': """Allows models to be extended. e.g. adding preprocessing steps. """ class Model(BaseModel): def estimator(unused, model_dir): del unused return f(self.estimator(model_dir)) def hparams(unused): del unused return self.hparams() return Model() @abc.abstractmethod def estimator(self, model_dir: str) -> tf.estimator.Estimator: pass def hparams(self) -> tf.contrib.training.HParams: return tf.contrib.training.HParams() ================================================ FILE: experiments/tf_trainer/common/basic_gpu_config.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU ================================================ FILE: experiments/tf_trainer/common/cnn_spec_parser.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """CNN Specification Parser. A simple parser for specifications of convolutional layers. BNF defining the syntax to specify CNNs: ``` layers = layer : layers layer = filters filters = filter, filters filter = (size / stride -> num_filters) size, stride, num_filters = \d+ ``` Inspiration for the notation comes from: `num_filters` being the output embedding size, and the other dimension of the computed CNN matrix will be `input_size * size / stride`. """ import re from typing import List layers_split_regexp = re.compile(r'\s*:\s*') filters_split_regexp = re.compile(r'\s*,\s*') filter_regexp = re.compile(r'\(\s*(?P\d+)\s*/\s*(?P\d+)\s*' r'\-\>\s*(?P\d+)\s*\)') class FilterParseError(Exception): pass class Filter(object): """A single CNN filter. filter = '(size / stride -> num_filters)' """ def __init__(self, str: str) -> None: m = filter_regexp.match(str) if m is None: raise FilterParseError('Bad filter definition for: %s' % str) self.num_filters = int(m.group('num_filters')) # type "int" self.size = int(m.group('size')) # type "int" self.stride = int(m.group('stride')) # type "int" def __str__(self) -> str: return ('(%d / %d -> %d)' % (self.size, self.stride, self.num_filters)) class ConcurrentFilters(object): """A set of concurrent CNN filters that make up one layer filters = filter, filters """ def __init__(self, str: str) -> None: filter_spec_strs = filters_split_regexp.split(str) self.filters = [Filter(s) for s in filter_spec_strs] def __str__(self) -> str: return ', '.join([str(f) for f in self.filters]) class SequentialLayers(object): """A sequence of CNN layers layers = filters : layers """ def __init__(self, str: str) -> None: layer_spec_strs = layers_split_regexp.split(str) self.layers = [ConcurrentFilters(s) for s in layer_spec_strs ] # type: List[ConcurrentFilters] def __str__(self) -> str: return ' : '.join([str(f) for f in self.layers]) ================================================ FILE: experiments/tf_trainer/common/cnn_spec_parser_test.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for tfrecord_input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tf_trainer.common.cnn_spec_parser import SequentialLayers from tf_trainer.common.cnn_spec_parser import ConcurrentFilters from tf_trainer.common.cnn_spec_parser import Filter class CnnSpecParserTest(tf.test.TestCase): def test_SequentialLayers(self): s = ('(2 / 2 -> 100), (3 / 2 -> 101) ' ': (6 / 2 -> 102) ' ': (3 / 1 -> 103)') spec = SequentialLayers(s) layer0 = spec.layers[0] self.assertEqual(len(layer0.filters), 2) layer0filter0 = layer0.filters[0] # type: Filter self.assertEqual(layer0filter0.size, 2) self.assertEqual(layer0filter0.stride, 2) self.assertEqual(layer0filter0.num_filters, 100) self.assertEqual(str(spec), s) if __name__ == '__main__': tf.test.main() ================================================ FILE: experiments/tf_trainer/common/dataset_config.sh ================================================ #!/bin/bash BASE_PATH="gs://conversationai-models" GCS_RESOURCES="${BASE_PATH}/resources" MODEL_PARENT_DIR="${BASE_PATH}/tf_trainer_runs" if [ "$1" == "civil_comments" ]; then train_path="${GCS_RESOURCES}/civil_comments_data/train_eval_test/train-*.tfrecord" valid_path="${GCS_RESOURCES}/civil_comments_data/train_eval_test/eval-*.tfrecord" labels="toxicity" label_dtypes="float" text_feature="comment_text" elif [ "$1" == "toxicity" ]; then train_path="${GCS_RESOURCES}/toxicity_data/toxicity_q42017_train.tfrecord" valid_path="${GCS_RESOURCES}/toxicity_data/toxicity_q42017_validate.tfrecord" labels="frac_neg" label_dtypes="float" text_feature="comment_text" elif [ "$1" == "many_communities" ]; then train_path="${GCS_RESOURCES}/transfer_learning_data/many_communities/20181105_train.tfrecord" valid_path="${GCS_RESOURCES}/transfer_learning_data/many_communities/20181105_validate.tfrecord" labels="removed" # removed is a boolean variable cast as an int. # 1 means that the comment was removed and 0 means it was not. label_dtypes="int" text_feature="comment_text" elif [ "$1" == "many_communities_40_per_8_shot" ]; then if [ "$2" == "optimistic" ]; then train_path="${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/augmented_train.tfrecord" elif [ "$2" == "pessimistic" ]; then train_path="${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/original_train..tfrecord" else echo "Must provide second positional argument." exit 1 fi valid_path="${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord" # test_path = "${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord" labels="label" # removed is a boolean variable cast as an int. # 1 means that the comment was removed and 0 means it was not. label_dtypes="int" text_feature="text" # used for param tuning train_steps=3000 eval_steps=250 eval_period=200 else echo "First positional arg must be one of civil_comments, toxicity, many_communities." exit 1 fi ================================================ FILE: experiments/tf_trainer/common/dataset_input.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Abstract Base Class for DatasetInput.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc from tf_trainer.common import types class DatasetInput(abc.ABC): """Abstract Base Class for Dataset Input. Provides the input functions (referred to as input_fn in TF docs) to be used with Tensorflow Estimator's train, evaluate, and predict methods. """ @abc.abstractmethod def train_input_fn(self) -> types.EstimatorInput: pass @abc.abstractmethod def validate_input_fn(self) -> types.EstimatorInput: pass ================================================ FILE: experiments/tf_trainer/common/episodic_tfrecord_input.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """DatasetInput implementation for episodic data.""" import tensorflow as tf from pathlib import Path import collections import os import random from tf_trainer.common import dataset_input from tf_trainer.common import types from typing import List, Dict, Tuple, Union tf.app.flags.DEFINE_string('train_path', None, 'Path to the training data TFRecord file.') tf.app.flags.DEFINE_string('dev_path', None, 'Path to the training data TFRecord file.') tf.app.flags.DEFINE_string('episode_size', None, 'Path to the training data TFRecord file.') Text = Union[tf.Tensor, str] Label = Union[tf.Tensor, float] TextDomainLabel = collections.namedtuple('TextDomainLabel', ['text', 'domain', 'label']) EpisodeData = collections.namedtuple('EpisodeData', ['texts', 'domains', 'labels']) class EpisodicTFRecordInput(dataset_input.DatasetInput): """Generates episodic data.""" def __init__(self, train_dir, validate_dir) -> None: self.train_dir = train_dir self.validate_dir = validate_dir def train_input_fn(self) -> types.FeatureAndLabelTensors: all_episodes = self._get_randomized_episodes(self.train_dir) all_texts = [ep.texts for ep in all_episodes] all_domains = [ep.domains for ep in all_episodes] all_labels = [ep.labels for ep in all_episodes] ds = tf.data.Dataset.from_tensor_slices((all_texts, all_domains, all_labels)) self.episode_batches_itr = ds.make_one_shot_iterator() return self.episode_batches_itr.get_next() def validate_input_fn(self) -> types.FeatureAndLabelTensors: pass def _get_randomized_episodes(self, directory: str) -> List[EpisodeData]: """Retrieves a list of domain specific datasets. Given a directory of TFRecord files, each holding data for a given domain, with file name "[domain].tfrecord", returns an iterator of datasets, each corresponding to the data for a single domain. """ tfrecord_files = tf.gfile.Glob(os.path.join(directory, '*.tfrecord')) episodes = [] for file_no, tfrecord_file in enumerate(tfrecord_files): tf.logging.info('PROCESSING FILE {}: {}'.format(file_no, tfrecord_file)) episodes.append(self._dataset_from_tfrecord_file(tfrecord_file)) tf.logging.info('Shuffling episodes') random.shuffle(episodes) # In place shuffle. return episodes def _dataset_from_tfrecord_file(self, tfrecord_file: str) -> EpisodeData: # The domain happens to be the file stem. domain = Path(tfrecord_file).stem def _read_tf_example(record) -> TextDomainLabel: parsed = tf.parse_single_example( record, { 'text': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64) }) # type: Dict[str, types.Tensor] return TextDomainLabel( text=parsed['text'], domain=domain, label=parsed['label']) examples = list(tf.python_io.tf_record_iterator(tfrecord_file)) random.shuffle(examples) datapoints = [_read_tf_example(example) for example in examples] return EpisodeData( texts=[dp.text for dp in datapoints], domains=[dp.domain for dp in datapoints], labels=[dp.label for dp in datapoints]) ================================================ FILE: experiments/tf_trainer/common/episodic_tfrecord_input_test.py ================================================ """Tests for episodic_tfrecord_input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tf_trainer.common import episodic_tfrecord_input class EpisodicTFRecordInputTest(tf.test.TestCase): def test(self): train_dir = 'gs://kaggle-model-experiments/resources/transfer_learning_data/many_communities_pruned_episodes' tf.logging.info('CREATE') e = episodic_tfrecord_input.EpisodicTFRecordInput(train_dir, 'asdf') tf.logging.info('GET DATA') episodic_batch = e.train_input_fn() with tf.Session() as session: tf.logging.info('FIRST BATCH') tf.logging.info(session.run(episodic_batch)) tf.logging.info('SECOND BATCH') print(session.run(episodic_batch)) if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.INFO) tf.test.main() ================================================ FILE: experiments/tf_trainer/common/model_trainer.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """The Model Trainer class. This provides an abstraction of Keras and TF.Estimator, and is intended for use in text classification models (although it may generalize to other kinds of problems). """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import os import os.path import six import tensorflow as tf from tensorflow.python.platform import tf_logging as logging from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.export.export_output import PredictOutput from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.ops import clip_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.training import optimizer as optimizer_lib from tensorflow.python.lib.io import file_io from tf_trainer.common import base_model from tf_trainer.common import dataset_input as ds FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('model_dir', None, "Directory for the Estimator's model directory.") tf.app.flags.DEFINE_string('warm_start_from', None, 'Existing checkpoint from which to start training.') tf.app.flags.DEFINE_bool('enable_profiling', False, 'Enable profiler hook in estimator.') tf.app.flags.DEFINE_integer( 'n_export', -1, 'Number of models to export.' 'If =-1, only the best checkpoint (wrt specified eval metric) is exported.' 'If =1, only the last checkpoint is exported.' 'If >1, we export `n_export` evenly-spaced checkpoints.') tf.app.flags.DEFINE_string('key_name', 'comment_key', 'Name of a pass-thru integer id for batch scoring.') tf.app.flags.DEFINE_integer('train_steps', 100000, 'The number of steps to train for.') tf.app.flags.DEFINE_integer('eval_period', 1000, 'The number of steps per eval period.') tf.app.flags.DEFINE_integer('eval_steps', None, 'Number of examples to eval for, default all.') tf.app.flags.mark_flag_as_required('model_dir') # Copied from: # https://stackoverflow.com/questions/49846207/tensorflow-estimator-warm-start-from-and-model-dir class InitHook(tf.train.SessionRunHook): """Initializes model from a checkpoint_path Args: checkpoint_dir: full path to dir containing the checkpoint """ def __init__(self, checkpoint_dir): self.model_path = checkpoint_dir self.initialized = False def begin(self): """ Restore parameters if a pre-trained model is available and we haven't trained previously. """ if not self.initialized: #checkpoint = tf.train.latest_checkpoint(self.model_path) all_checkpoints = file_io.get_matching_files(os.path.join( self.model_path, 'model.ckpt-*.index')) if not all_checkpoints: raise ValueError('No checkpoint files found matching %s.' % ( self.model_path + '*')) all_checkpoints = [x.replace('.index', '') for x in all_checkpoints] all_checkpoints = sorted(all_checkpoints, key=lambda x: int(x.split('-')[-1])) checkpoint = all_checkpoints[-1] if checkpoint is None: logging.info('No pre-trained model is available at %s, ' 'training from scratch.' % self.model_path) else: logging.info('Pre-trained model {0} found in {1} - warmstarting.'.format( checkpoint, self.model_path)) tf.train.warm_start(checkpoint) self.initialized = True # This function extends tf.contrib.estimator.forward_features. # As the binary_head has a ClassificationOutput for serving_default, # the check at the end of 'new_model_fn' fails in the initial fn. def forward_features(estimator, keys, sparse_default_values=None): """Forward features to predictions dictionary. In some cases, user wants to see some of the features in estimators prediction output. As an example, consider a batch prediction service: The service simply runs inference on the users graph and returns the results. Keys are essential because there is no order guarantee on the outputs so they need to be rejoined to the inputs via keys or transclusion of the inputs in the outputs. Example: ```python def input_fn(): features, labels = ... features['unique_example_id'] = ... features, labels estimator = tf.estimator.LinearClassifier(...) estimator = tf.contrib.estimator.forward_features( estimator, 'unique_example_id') estimator.train(...) assert 'unique_example_id' in estimator.predict(...) ``` Args: estimator: A `tf.estimator.Estimator` object. keys: A `string` sparse_default_values: A dict of `str` keys mapping the name of the sparse features to be converted to dense, to the default value to use. Only sparse features indicated in the dictionary are converted to dense and the provided default value is used. Returns: A new `tf.estimator.Estimator` which forwards features to predictions. Raises: ValueError: * if `keys` is already part of `predictions`. We don't allow override. * if 'keys' does not exist in `features`. TypeError: if `keys` type is not one of `string` or list/tuple of `string`. """ def verify_key_types(keys): # pylint: disable=missing-docstring if keys is None: return keys if isinstance(keys, six.string_types): return [keys] if not isinstance(keys, (list, tuple)): raise TypeError('keys should be either a string or a list of strings. ' 'Given: {}'.format(type(keys))) for key in keys: if not isinstance(key, six.string_types): raise TypeError('All items in the given keys list should be a string. ' 'There exist an item with type: {}'.format(type(key))) return keys def get_keys(features): if keys is None: return features.keys() return keys def verify_keys_and_predictions(features, predictions): if not isinstance(predictions, dict): raise ValueError( 'Predictions should be a dict to be able to forward features. ' 'Given: {}'.format(type(predictions))) for key in get_keys(features): if key not in features: raise ValueError( 'keys should be exist in features. Key "{}" is not in features ' 'dict. features dict has following keys: {}. Please check ' 'arguments of forward_features.'.format(key, features.keys())) if key in predictions: raise ValueError( 'Cannot forward feature key ({}). Since it does exist in ' 'predictions. Existing prediction keys: {}. Please check arguments ' 'of forward_features.'.format(key, predictions.keys())) keys = verify_key_types(keys) def new_model_fn(features, labels, mode, config): # pylint: disable=missing-docstring spec = estimator.model_fn(features, labels, mode, config) predictions = spec.predictions if predictions is None: return spec verify_keys_and_predictions(features, predictions) for key in get_keys(features): feature = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( features[key]) if sparse_default_values and (key in sparse_default_values): if not isinstance(feature, sparse_tensor_lib.SparseTensor): raise ValueError( 'Feature ({}) is expected to be a `SparseTensor`.'.format(key)) feature = sparse_ops.sparse_tensor_to_dense( feature, default_value=sparse_default_values[key]) if not isinstance(feature, ops.Tensor): raise ValueError( 'Feature ({}) should be a Tensor. Please use `keys` ' 'argument of forward_features to filter unwanted features, or' 'add key to argument `sparse_default_values`.' 'Type of features[{}] is {}.'.format(key, key, type(feature))) predictions[key] = feature spec = spec._replace(predictions=predictions) if spec.export_outputs: # CHANGES HERE outputs = spec.export_outputs['predict'].outputs outputs[key] = spec.predictions[key] spec.export_outputs['predict'] = tf.estimator.export.PredictOutput( outputs) spec.export_outputs[ 'serving_default'] = tf.estimator.export.PredictOutput(outputs) return spec return estimator_lib.Estimator( model_fn=new_model_fn, model_dir=estimator.model_dir, config=estimator.config) class ModelTrainer(object): """Model Trainer.""" def __init__(self, dataset: ds.DatasetInput, model: base_model.BaseModel, warm_start_from: str = None) -> None: self._dataset = dataset self._model = model self._warm_start_from = warm_start_from self._estimator = model.estimator(self._model_dir()) def train_with_eval(self): """Train with periodic evaluation. """ training_hooks = None if FLAGS.enable_profiling: training_hooks = [ tf.train.ProfilerHook( save_steps=10, output_dir=os.path.join(self._model_dir(), 'profiler')), ] if self._warm_start_from: init_hook = InitHook(checkpoint_dir=self._warm_start_from) if training_hooks: training_hooks.append(init_hook) else: training_hooks = [init_hook] train_spec = tf.estimator.TrainSpec( input_fn=self._dataset.train_input_fn, max_steps=FLAGS.train_steps, hooks=training_hooks) eval_spec = tf.estimator.EvalSpec( input_fn=self._dataset.validate_input_fn, steps=FLAGS.eval_steps, throttle_secs=1) self._estimator._config = self._estimator.config.replace( save_checkpoints_steps=FLAGS.eval_period) if FLAGS.n_export > 1 or FLAGS.n_export == -1: self._estimator._config = self._estimator.config.replace( keep_checkpoint_max=None) tf.estimator.train_and_evaluate(self._estimator, train_spec, eval_spec) def predict_on_dev(self, predict_keys=None): checkpoints, _ = self._get_list_checkpoint(1, self._model_dir(), None, None) return self._estimator.predict(self._dataset.validate_input_fn, predict_keys=predict_keys, checkpoint_path=checkpoints[0]) def eval_dir(self): return self._estimator.eval_dir() def _model_dir(self): """Get Model Directory. Used to scope logs to a given trial (when hyper param tuning) so that they don't run over each other. When running locally it will just use the passed in model_dir. """ return os.path.join( FLAGS.model_dir, json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get( 'trial', '')) def _add_estimator_key(self, estimator, example_key_name): """Adds a forward key to the model_fn of an estimator.""" estimator = forward_features(estimator, example_key_name) return estimator def _get_best_step_from_event_file(self, event_file, metrics_key, is_first_metric_better_fn): """Find, in `event_file`, the step corresponding to the best metric. Args: event_file: The event file where to find the metrics. metrics_key: The metric by which to determine the best checkpoint to save. is_first_metric_better_fn: Comparison function to find best metric. Takes in as arguments two numbers, returns true if first is better than second. Default function says larger is better. Default value works for AUC: higher is better. Returns: Best step (int). """ if not metrics_key: return None best_metric = None best_step = None for e in tf.train.summary_iterator(event_file): for v in e.summary.value: if v.tag == metrics_key: metric = v.simple_value if not best_step or is_first_metric_better_fn(metric, best_metric): best_metric = metric best_step = e.step return best_step def _get_best_checkpoint(self, checkpoints, metrics_key, is_first_metric_better_fn): """Find the best checkpoint, according to `metrics_key`. Args: checkpoints: List of model checkpoints. metrics_key: The metric by which to determine the best checkpoint to save. is_first_metric_better_fn: Comparison function to find best metric. Takes in as arguments two numbers, returns true if first is better than second. Default function says larger is better. Default value works for AUC: higher is better. Returns: Best checkpoint path. """ eval_event_dir = self._estimator.eval_dir() event_files = file_io.list_directory(eval_event_dir) if not event_files: raise ValueError('No event files found in directory %s.' % eval_event_dir) if len(event_files) > 1: print('Multiple event files found in dir %s. Using last one.' % eval_event_dir) event_file = os.path.join(eval_event_dir, event_files[-1]) # Use the best step to find the best checkpoint. best_step = self._get_best_step_from_event_file(event_file, metrics_key, is_first_metric_better_fn) # If we couldn't find metrics_key in the event file, try again using loss. if best_step is None: print("Metrics key %s not found in metrics, using 'loss' as metric key." % metrics_key) metrics_key = "loss" # Want the checkpoint with the lowest loss is_first_metric_better_fn = lambda x, y: x < y best_step = self._get_best_step_from_event_file(event_file, metrics_key, is_first_metric_better_fn) if best_step is None: raise ValueError("Couldn't find 'loss' metric in event file %s." % event_file) best_checkpoint_path = None for checkpoint_path in checkpoints: version = int(checkpoint_path.split('-')[-1]) if version == best_step: best_checkpoint_path = checkpoint_path if not best_checkpoint_path: raise ValueError("Couldn't find checkpoint for best_step = %d." % best_step) return best_checkpoint_path def _get_list_checkpoint(self, n_export, model_dir, metrics_key, is_first_metric_better_fn): """Get the checkpoints that we want to export, as well as the ones to clean up. Args: n_export: Number of models to export. model_dir: Directory containing the checkpoints. metrics_key: The metric by which to determine the best checkpoint to save. is_first_metric_better_fn: Comparison function to find best metric. Takes in as arguments two numbers, returns true if first is better than second. Default function says larger is better. Default value works for AUC: higher is better. Returns: Tuple of: List of checkpoint paths to export, Set of checkpoint paths to delete. If n_export==1, we take only the last checkpoint. If n_export==-1, we take the best checkpoint, according to `metrics_key` and `is_first_metric_better_fn`. The remaining checkpoints are deleted. Otherwise, we consider the list of steps for each for which we have a checkpoint. Then we choose n_export number of checkpoints such that their steps are as equidistant as possible. """ all_checkpoints = file_io.get_matching_files( os.path.join(model_dir, 'model.ckpt-*.index')) if not all_checkpoints: raise ValueError('No checkpoint files found matching model.ckpt-*.index.') all_checkpoints = [x.replace('.index', '') for x in all_checkpoints] all_checkpoints = sorted(all_checkpoints, key=lambda x: int(x.split('-')[-1])) # Keep track of the checkpoints to export, and the ones to delete. checkpoints_to_export = None checkpoints_to_delete = None if n_export == 1: checkpoints_to_export = [all_checkpoints[-1]] elif n_export == -1: checkpoints_to_export = [self._get_best_checkpoint(all_checkpoints, metrics_key, is_first_metric_better_fn)] elif n_export > 1: # We want to cover a distance of (len(checkpoints) - 1): for 3 points, we have a distance of 2. # with a number of points of (n_export -1): because 1 point is set at the end. step = float(len(all_checkpoints) - 1) / (n_export - 1) if step <= 1: # Fewer checkpoints available than the desired number. return all_checkpoints, None checkpoints_to_export = [ all_checkpoints[int(i * step)] for i in range(n_export - 1) ] checkpoints_to_export.append(all_checkpoints[-1]) if checkpoints_to_export: checkpoints_to_delete = set(all_checkpoints) - set(checkpoints_to_export) return checkpoints_to_export, checkpoints_to_delete def export(self, serving_input_fn, example_key_name=None, metrics_key=None, is_first_metric_better_fn=lambda x, y: x > y, delete_unexported_checkpoints=True): """Export model as a .pb. Args: serving_input_fn: An input function for inference graph. example_key_name: Name of the example_key field (string). If None, no example_key will be used. metrics_key: The metric by which to determine the best checkpoint to save. is_first_metric_better_fn: Comparison function to find best metric. Takes in as arguments 3 numbers, returns true if first is better than second. Default function says larger is better. Default value works for AUC: higher is better. delete_unexported_checkpoints: Boolean flag indicating whether or not to delete the checkpoints that aren't exported. If False then all model checkpoints are retained. NOTE: if using a different metrics_key than AUC, make sure `is_first_metric_better_fn` is updated accordingly. Example keys are useful when doing batch predictions. Typically, the predictions are done by a cluster of machines and the order of the results is random. Here, we add a forward feature in the inference graph (https://www.tensorflow.org/api_docs/python/tf/contrib/estimator/forward_features) which will be used as an example unique identifier. In inference, the input example includes an example_key field that is passed along by the estimator and returned in the predictions. """ if FLAGS.n_export == -1: if not is_first_metric_better_fn: raise ValueError('Must provide valid `is_first_metric_better_fn` ' 'when exporting best checkpoint.') if not metrics_key: print('No value provided for `metrics_key`. Using loss.') metrics_key = 'loss' is_first_metric_better_fn = lambda x, y: x < y estimator = self._estimator if example_key_name: estimator = self._add_estimator_key(self._estimator, example_key_name) checkpoints_to_export, checkpoints_to_delete = self._get_list_checkpoint( FLAGS.n_export, self._model_dir(), metrics_key, is_first_metric_better_fn) # Delete the checkpoints we don't want. if checkpoints_to_delete and delete_unexported_checkpoints: for ckpt in checkpoints_to_delete: tf.train.remove_checkpoint(ckpt) # Export the desired checkpoints. if checkpoints_to_export: for checkpoint_path in checkpoints_to_export: version = checkpoint_path.split('-')[-1] estimator.export_savedmodel( export_dir_base=os.path.join(self._model_dir(), version), serving_input_receiver_fn=serving_input_fn, checkpoint_path=checkpoint_path) ================================================ FILE: experiments/tf_trainer/common/p100_config.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: CUSTOM masterType: standard_p100 workerType: standard_p100 parameterServerType: large_model workerCount: 1 parameterServerCount: 1 ================================================ FILE: experiments/tf_trainer/common/serving_input.py ================================================ """Serving functions for deployed model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.ops import array_ops FLAGS = tf.app.flags.FLAGS def create_text_serving_input_fn(text_feature_name, example_key_name): def serving_input_fn_tfrecords(): serialized_example = tf.placeholder( shape=[None], dtype=tf.string, name="input_example_tensor") feature_spec = { text_feature_name: tf.FixedLenFeature([], dtype=tf.string), example_key_name: tf.FixedLenFeature([], dtype=tf.int64, default_value=-1) } features = tf.parse_example(serialized_example, feature_spec) return tf.estimator.export.ServingInputReceiver(features, serialized_example) return serving_input_fn_tfrecords def create_serving_input_fn(word_to_idx, unknown_token, text_feature_name, example_key_name): def serving_input_fn_tfrecords(): serialized_example = tf.placeholder( shape=[None], dtype=tf.string, name="input_example_tensor") feature_spec = { text_feature_name: tf.VarLenFeature(dtype=tf.string), example_key_name: tf.FixedLenFeature([], dtype=tf.int64, default_value=-1) } features = tf.parse_example(serialized_example, feature_spec) keys = list(word_to_idx.keys()) values = list(word_to_idx.values()) vocabulary_table = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer( keys, values, key_dtype=tf.string, value_dtype=tf.int64), unknown_token) words_int_sparse = vocabulary_table.lookup(features[text_feature_name]) words_int_dense = tf.sparse_tensor_to_dense( words_int_sparse, default_value=0) features[text_feature_name] = words_int_dense return tf.estimator.export.ServingInputReceiver(features, serialized_example) return serving_input_fn_tfrecords ================================================ FILE: experiments/tf_trainer/common/text_preprocessor.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Text Preprocessor.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools from absl import flags import numpy as np import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import types from tf_trainer.common.token_embedding_index import LoadTokenIdxEmbeddings from typing import Callable, Dict, List, Optional, Tuple FLAGS = flags.FLAGS tf.app.flags.DEFINE_bool('is_embedding_trainable', False, 'Enable fine tuning of embeddings.') class TextPreprocessor(object): """Text Preprocessor TensorFlow Estimator Extension. Uses embedding indexes to create tensors that map tokens (provided by an abstract tokenizer funtion) to embeddings. Note: Due to the lack of text preprocessing functions in tensorflow, we expect that the text is already preprocessed (list of words) in inference. In training, due to the availability of tf.py_func, we can handle the preprocessing. """ def __init__(self, embeddings_path: str) -> None: self._word_to_idx, self._embeddings_matrix, self._unknown_token, self._embedding_size = \ LoadTokenIdxEmbeddings(embeddings_path) # type: Tuple[Dict[str, int], np.ndarray, int, int] def train_preprocess_fn(self, tokenizer: Callable[[str], List[str]], lowercase: Optional[bool] = True ) -> Callable[[types.Tensor], types.Tensor]: def _tokenize(text: bytes) -> np.ndarray: """Converts text to a list of words. Args: text: text to tokenize (string). lowercase: whether to include lowercasing in preprocessing (boolean). tokenizer: Python function to tokenize the text on. Returns: A list of strings (words). """ words = tokenizer(text.decode('utf-8')) if lowercase: words = [w.lower() for w in words] return np.asarray( [self._word_to_idx.get(w, self._unknown_token) for w in words], dtype=np.int64) def _preprocess_fn(text: types.Tensor) -> types.Tensor: """Converts a text into a list of integers. Args: text: a 0-D string Tensor. Returns: A 1-D int64 Tensor. """ words = tf.py_func( _tokenize, [text], tf.int64, stateful=False, name='PreprocessFn') return words return _preprocess_fn def add_embedding_to_model(self, model: base_model.BaseModel, text_feature_name: str) -> base_model.BaseModel: """Returns a new BaseModel with an embedding layer prepended. Args: model: An existing BaseModel instance. text_feature_name: The name of the feature containing text. """ return model.map( functools.partial(self.create_estimator_with_embedding, text_feature_name)) def create_estimator_with_embedding( self, text_feature_name: str, estimator: tf.estimator.Estimator) -> tf.estimator.Estimator: """Takes an existing estimator and prepends the embedding layers to it. Args: estimator: A predefined Estimator that expects embeddings. text_feature_name: The name of the feature containing the text. Returns: TF Estimator with embedding ops added. Note: We need to consider the case of large embeddings (see: https://stackoverflow.com/questions/48217599/ how-to-initialize-embeddings-layer-within-estimator-api/48243086#48243086). """ old_model_fn = estimator.model_fn old_config = estimator.config old_params = estimator.params def add_init_fn_to_estimatorSpec(estimator_spec, init_fn): """Add a new init_fn to the scaffold part of estimator spec.""" def new_init_fn(scaffold, sess): init_fn(scaffold, sess) if estimator_spec.scaffold.init_fn: estimator_spec.scaffold.init_fn(scaffold, sess) scaffold = tf.train.Scaffold( init_fn=new_init_fn, copy_from_scaffold=estimator_spec.scaffold) estimator_spec_with_scaffold = tf.estimator.EstimatorSpec( mode=estimator_spec.mode, predictions=estimator_spec.predictions, loss=estimator_spec.loss, train_op=estimator_spec.train_op, eval_metric_ops=estimator_spec.eval_metric_ops, export_outputs=estimator_spec.export_outputs, training_chief_hooks=estimator_spec.training_chief_hooks, training_hooks=estimator_spec.training_hooks, scaffold=scaffold, evaluation_hooks=estimator_spec.evaluation_hooks, prediction_hooks=estimator_spec.prediction_hooks) return estimator_spec_with_scaffold def new_model_fn(features, labels, mode, params, config): """model_fn used in defining the new TF Estimator""" embeddings, embedding_init_fn = self.word_embeddings( trainable=FLAGS.is_embedding_trainable) text_feature = features[text_feature_name] word_embeddings = tf.nn.embedding_lookup(embeddings, text_feature) new_features = {text_feature_name: word_embeddings} # Fix dimensions to make Keras model output match label dims. if mode != tf.estimator.ModeKeys.PREDICT: labels = {k: tf.expand_dims(v, -1) for k, v in labels.items()} # TODO: Modify when embeddings are part of the model. estimator_spec = old_model_fn( new_features, labels, mode=mode, config=config) estimator_spec_with_scaffold = add_init_fn_to_estimatorSpec( estimator_spec, embedding_init_fn) return estimator_spec_with_scaffold return tf.estimator.Estimator( new_model_fn, config=old_config, params=old_params) def word_to_idx(self) -> Dict[str, int]: return self._word_to_idx def unknown_token(self) -> int: return self._unknown_token def word_embeddings(self, trainable) -> tf.Variable: """Get word embedding TF Variable.""" embeddings = tf.get_variable( 'embeddings', self._embeddings_matrix.shape, trainable=trainable) def init_fn(scaffold, sess): sess.run(embeddings.initializer, {embeddings.initial_value: self._embeddings_matrix}) return embeddings, init_fn ================================================ FILE: experiments/tf_trainer/common/text_preprocessor_test.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for text_preprocessor.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tf_trainer.common import text_preprocessor class TextPreprocessorTest(tf.test.TestCase): def test_Tokenize(self): preprocessor = text_preprocessor.TextPreprocessor( 'testdata/cats_and_dogs_onehot.vocab.txt') with self.test_session() as session: preprocess_fn = preprocessor.train_preprocess_fn( tokenizer=lambda x: x.split(' '), lowercase=False) tokens = preprocess_fn('dogs good cats bad rabbits not') self.assertEqual(list(tokens.eval()), [1, 3, 2, 4, 7, 6]) def test_Lowercase(self): preprocessor = text_preprocessor.TextPreprocessor( 'testdata/cats_and_dogs_onehot.vocab.txt') with self.test_session() as session: preprocess_fn = preprocessor.train_preprocess_fn( tokenizer=lambda x: x.split(' '), lowercase=True) tokens = preprocess_fn('Dogs GOOD Cats BAD rabbits not') self.assertEqual(list(tokens.eval()), [1, 3, 2, 4, 7, 6]) if __name__ == '__main__': tf.test.main() ================================================ FILE: experiments/tf_trainer/common/tfrecord_input.py ================================================ """DatasetInput class based on TFRecord files.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import multiprocessing import tensorflow as tf from typing import Callable, List, Dict, Tuple from tf_trainer.common import base_model from tf_trainer.common import dataset_input from tf_trainer.common import types tf.app.flags.DEFINE_string('train_path', None, 'Path to the training data TFRecord file.') tf.app.flags.DEFINE_string('validate_path', None, 'Path to the validation data TFRecord file.') tf.app.flags.DEFINE_string('labels', 'frac_neg', 'Comma separated list of label features.') tf.app.flags.DEFINE_string( 'label_dtypes', None, 'Comma separated list of dtypes for labels. Each ' 'dtype must be float or int. If not provided ' 'assumes all labels are floats.') tf.app.flags.DEFINE_string('text_feature', 'comment_text', 'Name of feature containing text input.') tf.app.flags.DEFINE_boolean('round_labels', True, 'Round label features to 0 or 1 if true.') tf.app.flags.DEFINE_integer('batch_size', 256, 'Batch sizes to use when reading.') tf.app.flags.DEFINE_integer( 'num_prefetch', 5, 'An optimization parameter for the number of elements to prefetch. See: ' 'https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch') FLAGS = tf.app.flags.FLAGS DTYPE_MAPPING = {'float': tf.float32, 'int': tf.int64} DTYPE_DEFAULT = {'float': -1.0, 'int': -1} class TFRecordInput(dataset_input.DatasetInput): """Simple no-preprocessing TFRecord based DatasetInput. Handles parsing of TF Examples. Regardless of which TF Example feature key is used, as specified by the FLAGS.text_feature, the simple input will store the input text feature in the feature key _text_feature. """ def __init__(self) -> None: self._labels = FLAGS.labels.split(',') if FLAGS.label_dtypes: self._label_dtypes = FLAGS.label_dtypes.split(',') else: self._label_dtypes = ['float'] * len(self._labels) self._batch_size = FLAGS.batch_size self._num_prefetch = FLAGS.num_prefetch self._text_feature = FLAGS.text_feature self._round_labels = FLAGS.round_labels def labels(self) -> List[str]: """List of the names of the float label features.""" return self._labels def text_feature(self) -> str: """Name of the feature containing the input text from examples.""" return self._text_feature def train_input_fn(self) -> tf.data.TFRecordDataset: """input_fn for TF Estimators for training set. Automatically repeats over input data forever. We define epoch limits in the model trainer. """ assert FLAGS.train_path return self._input_fn_from_file(FLAGS.train_path).repeat() def validate_input_fn(self) -> tf.data.TFRecordDataset: """input_fn for TF Estimators for validation set.""" assert FLAGS.validate_path return self._input_fn_from_file(FLAGS.validate_path) def _keys_to_features(self): keys_to_features = {} keys_to_features[self._text_feature] = tf.FixedLenFeature([], tf.string) for label, dtype in zip(self._labels, self._label_dtypes): keys_to_features[label] = tf.FixedLenFeature([], DTYPE_MAPPING[dtype], DTYPE_DEFAULT[dtype]) return keys_to_features def _input_fn_from_file(self, filepath: str) -> tf.data.TFRecordDataset: filenames_dataset = tf.data.Dataset.list_files(filepath) dataset = tf.data.TFRecordDataset( filenames_dataset) # type: tf.data.TFRecordDataset parsed_dataset = dataset.map( self._read_tf_example, num_parallel_calls=multiprocessing.cpu_count()) return parsed_dataset.batch(self._batch_size).prefetch(self._num_prefetch) def _process_labels(self, features, parsed): """Applies rounding and computes weights tied to feature presence. For all of the expected labels, if the value is negative, this indicates a missing feature from the input. A corresponding label name, suffixed by '_weight' will be added to the features with a value of 1.0 is present, and 0.0 if absent. The label value is rounded up or down (if enabled) and then mapped to zero if missing. Args: features: the input features read from a TF Example. parsed: the input labels read from a TF Example. Returns: A tuple of the features dict (with weights) and the labels dict. """ # Make a deep copy to avoid changing the input. new_features = {k: v for k, v in features.items()} labels = {} for label in self._labels: label_value = tf.cast(parsed[label], dtype=tf.float32) # Missing values are negative, find them and zero those features out. weight = tf.cast(tf.greater_equal(label_value, 0.0), dtype=tf.float32) if self._round_labels: label_value = tf.round(label_value) new_features[label + '_weight'] = weight labels[label] = tf.multiply(label_value, weight) return new_features, labels def _read_tf_example( self, record: tf.Tensor, ) -> types.FeatureAndLabelTensors: """Parses TF Example protobuf into a text feature and labels. The input TF Example has a text feature as a singleton list with the full comment as the single element. """ parsed = tf.parse_single_example( record, self._keys_to_features()) # type: Dict[str, types.Tensor] features = {base_model.TEXT_FEATURE_KEY: parsed[self._text_feature]} return self._process_labels(features, parsed) class TFRecordInputWithTokenizer(TFRecordInput): """TFRecord based DatasetInput. Handles parsing of TF Examples. When handling text input, this class will rewrite the text input future, using the preprocessing fn. That is, the text feature will be rewritten as a new key in the output changing both the type and contents - from a string to a tensor of in integers representing tokens of some kind. TODO: preserve the original string and write a new key. """ def __init__(self, train_preprocess_fn: Callable[[str], List[str]], max_seq_len: int = 30000) -> None: super().__init__() self._train_preprocess_fn = train_preprocess_fn self._max_seq_len = max_seq_len def _input_fn_from_file(self, filepath: str) -> types.FeatureAndLabelTensors: filenames_dataset = tf.data.Dataset.list_files(filepath) dataset = tf.data.TFRecordDataset( filenames_dataset) # type: tf.data.TFRecordDataset parsed_dataset = dataset.map( self._read_tf_example, num_parallel_calls=multiprocessing.cpu_count()) parsed_dataset = parsed_dataset.filter(lambda x, _: tf.less( x['sequence_length'], self._max_seq_len)) feature_shapes = { base_model.TOKENS_FEATURE_KEY: [None], 'sequence_length': [] } for label in self._labels: feature_shapes[label + '_weight'] = [] padded_shapes = ( feature_shapes, {label: [] for label in self._labels}) # type: Tuple[Dict, Dict] parsed_dataset = parsed_dataset.apply( tf.contrib.data.bucket_by_sequence_length( element_length_func=lambda x, _: x['sequence_length'], bucket_boundaries=[(i + 1) * 20 for i in range(10)], bucket_batch_sizes=[self._batch_size] * 11, padded_shapes=padded_shapes)) batched_dataset = parsed_dataset.prefetch(self._num_prefetch) return batched_dataset def _read_tf_example( self, record: tf.Tensor, ) -> types.FeatureAndLabelTensors: """Parses TF Example protobuf into a text feature and labels. The input TF Example has a text feature as a singleton list with the full comment as the single element. """ parsed = tf.parse_single_example( record, self._keys_to_features()) # type: Dict[str, types.Tensor] text = parsed[self.text_feature()] tokens = self._train_preprocess_fn(text) features = { base_model.TOKENS_FEATURE_KEY: tokens, 'sequence_length': tf.shape(tokens)[0], } return self._process_labels(features, parsed) ================================================ FILE: experiments/tf_trainer/common/tfrecord_input_test.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for tfrecord_input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import tfrecord_input from tf_trainer.common import types FLAGS = tf.app.flags.FLAGS class TFRecordInputTest(tf.test.TestCase): def setUp(self): FLAGS.text_feature = 'comment' ex = tf.train.Example( features=tf.train.Features( feature={ 'label': tf.train.Feature( float_list=tf.train.FloatList(value=[0.8])), 'ignored-label': tf.train.Feature( float_list=tf.train.FloatList(value=[0.125])), 'int_label': tf.train.Feature(int64_list=tf.train.Int64List(value=[0])), 'comment': tf.train.Feature( bytes_list=tf.train.BytesList( value=['Hi there Bob'.encode('utf-8')])) })) self.ex_tensor = tf.convert_to_tensor( ex.SerializeToString(), dtype=tf.string) def test_TFRecordInput_unrounded(self): FLAGS.round_labels = False FLAGS.labels = 'label' dataset_input = tfrecord_input.TFRecordInput() with self.test_session(): features, labels = dataset_input._read_tf_example(self.ex_tensor) self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(), b'Hi there Bob') np.testing.assert_almost_equal(labels['label'].eval(), 0.8) np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0) self.assertCountEqual(list(labels), ['label']) self.assertCountEqual(list(features), ['text', 'label_weight']) def test_TFRecordInput_default_values(self): FLAGS.labels = 'label,fake_label,int_label' FLAGS.label_dtypes = 'float,float,int' FLAGS.round_labels = False dataset_input = tfrecord_input.TFRecordInput() with self.test_session(): features, labels = dataset_input._read_tf_example(self.ex_tensor) self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(), b'Hi there Bob') np.testing.assert_almost_equal(labels['label'].eval(), 0.8) np.testing.assert_almost_equal(labels['int_label'].eval(), 0.0) np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0) np.testing.assert_almost_equal(labels['fake_label'].eval(), 0.0) np.testing.assert_almost_equal(features['fake_label_weight'].eval(), 0.0) def test_TFRecordInput_rounded(self): FLAGS.labels = 'label' FLAGS.round_labels = True dataset_input = tfrecord_input.TFRecordInput() with self.test_session(): features, labels = dataset_input._read_tf_example(self.ex_tensor) self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(), b'Hi there Bob') np.testing.assert_almost_equal(labels['label'].eval(), 1.0) np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0) class TFRecordInputWithTokenizerTest(tf.test.TestCase): def setUp(self): FLAGS.text_feature = 'comment' ex = tf.train.Example( features=tf.train.Features( feature={ 'label': tf.train.Feature( float_list=tf.train.FloatList(value=[0.8])), 'int_label': tf.train.Feature(int64_list=tf.train.Int64List(value=[0])), 'comment': tf.train.Feature( bytes_list=tf.train.BytesList( value=['Hi there Bob'.encode('utf-8')])) })) self.ex_tensor = tf.convert_to_tensor( ex.SerializeToString(), dtype=tf.string) self.word_to_idx = {'Hi': 12, 'there': 13} self.unknown_token = 999 def preprocessor(self, text): return tf.py_func( lambda t: np.asarray([ self.word_to_idx.get(x, self.unknown_token) for x in t.decode('utf-8').split(' ') ]), [text], tf.int64) def test_TFRecordInputWithTokenizer_unrounded(self): FLAGS.labels = 'label,fake_label,int_label,fake_int_label' FLAGS.label_dtypes = 'float,float,int,int' FLAGS.round_labels = False dataset_input = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=self.preprocessor) with self.test_session(): features, labels = dataset_input._read_tf_example(self.ex_tensor) self.assertEqual( list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999]) self.assertAlmostEqual(labels['label'].eval(), 0.8) self.assertAlmostEqual(labels['fake_label'].eval(), 0.0) self.assertAlmostEqual(labels['int_label'].eval(), 0.0) self.assertAlmostEqual(labels['fake_int_label'].eval(), 0.0) self.assertAlmostEqual(features['label_weight'].eval(), 1.0) self.assertAlmostEqual(features['fake_label_weight'].eval(), 0.0) self.assertAlmostEqual(features['int_label_weight'].eval(), 1.0) self.assertAlmostEqual(features['fake_int_label_weight'].eval(), 0.0) def test_TFRecordInputWithTokenizer_default_values(self): FLAGS.labels = 'label,fake_label' FLAGS.round_labels = False dataset_input = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=self.preprocessor) with self.test_session(): features, labels = dataset_input._read_tf_example(self.ex_tensor) self.assertEqual( list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999]) self.assertAlmostEqual(labels['label'].eval(), 0.8) self.assertAlmostEqual(labels['fake_label'].eval(), 0.0) self.assertAlmostEqual(features['label_weight'].eval(), 1.0) self.assertAlmostEqual(features['fake_label_weight'].eval(), 0.0) def test_TFRecordInputWithTokenizer_rounded(self): FLAGS.labels = 'label' FLAGS.round_labels = True dataset_input = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=self.preprocessor) with self.test_session(): features, labels = dataset_input._read_tf_example(self.ex_tensor) self.assertEqual( list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999]) self.assertEqual(labels['label'].eval(), 1.0) self.assertEqual(features['label_weight'].eval(), 1.0) if __name__ == '__main__': tf.test.main() ================================================ FILE: experiments/tf_trainer/common/token_embedding_index.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Working with Token Embeding Indexes.""" from typing import Tuple, Dict, Optional, List, Callable import numpy as np import functools import tensorflow as tf def LoadTokenIdxEmbeddings(embeddings_path: str) \ -> Tuple[Dict[str, int], np.ndarray, int, int]: """Generate word to idx mapping and word embeddings numpy array. We have two levels of indirection (e.g. word to idx and then idx to embedding) which could reduce embedding size if multiple words map to the same idx; although this is not currently a real or useful use-case. Args: embeddings_path: Local, GCS, or HDFS path to embedding file. Each line should be a word and its vector representation separated by a space. Returns: Tuple of: A vocabulary dictionary (mapping words to their index) A Numpy array of word embeddings with shape (vocab size, embedding size) A unique unknown token index (greater than all other token indexes) The size of the embeddings for words that is being used """ word_to_idx = {} word_embeddings = [] if not tf.gfile.Exists(embeddings_path): raise ValueError('File at %s does not exist.' % embeddings_path) with tf.gfile.Open(embeddings_path) as f: for idx, line in enumerate(f): values = line.split() word = values[0] word_embedding = np.asarray(values[1:], dtype='float32') word_to_idx[word] = idx + 1 # Reserve first row for padding word_embeddings.append(word_embedding) if not word_embeddings: raise ValueError('No embeddings loaded from %s.' % embeddings_path) # Add the padding "embedding" word_embeddings.insert(0, np.random.randn(len(word_embeddings[0]))) # Convert embedding to numpy array and append the unknown word embedding, # which is the mean of all other embeddings. unknown_token = len(word_embeddings) embeddings_matrix = np.asarray(word_embeddings, dtype=np.float32) embeddings_matrix = np.append( embeddings_matrix, [embeddings_matrix.mean(axis=0)], axis=0) return word_to_idx, embeddings_matrix, unknown_token, len(word_embeddings[0]) ================================================ FILE: experiments/tf_trainer/common/token_embedding_index_test.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for tfrecord_input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tf_trainer.common.token_embedding_index import LoadTokenIdxEmbeddings class LoadTokenIdxEmbeddingsTest(tf.test.TestCase): def test_LoadTokenIdxEmbeddings(self): idx, embeddings, unknown_idx, embedding_size = LoadTokenIdxEmbeddings( 'testdata/cats_and_dogs_onehot.vocab.txt') self.assertEqual(embedding_size, 6) self.assertEqual(unknown_idx, 7) self.assertEqual(idx['dogs'], 1) self.assertEqual(idx['cats'], 2) self.assertEqual(idx['not'], 6) self.assertEqual(embeddings[1][0], 1.0) self.assertEqual(embeddings[1][1], 0.0) # Note: padding embedding will be random, and is index 0. Also the unknown # token embedding will be random, and is index n+1; 7 in this case. if __name__ == '__main__': tf.test.main() ================================================ FILE: experiments/tf_trainer/common/types.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Types for the tf_trainer module.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from typing import NewType, Union, Dict, Tuple # Type aliases for convenience. Tensor = Union[tf.Tensor, tf.SparseTensor] TensorDict = Dict[str, Tensor] TensorOrTensorDict = Union[tf.Tensor, TensorDict] FeatureAndLabelTensors = Tuple[TensorOrTensorDict, TensorOrTensorDict] # See: https://www.tensorflow.org/api_docs/python/tf/estimator/TrainSpec EstimatorInput = Union[FeatureAndLabelTensors, tf.data.Dataset] ================================================ FILE: experiments/tf_trainer/common/v100_config.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: CUSTOM masterType: standard_v100 ================================================ FILE: experiments/tf_trainer/tf_char_cnn/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/tf_char_cnn/hparam_config.yaml ================================================ trainingInput: pythonVersion: '3.5' # scaleTier: CUSTOM # masterType: standard # workerType: standard_gpu # parameterServerType: large_model # workerCount: 1 # parameterServerCount: 1 scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/toxicity # TODO: change based on dataset maxTrials: 120 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 1 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: INTEGER minValue: 16 maxValue: 256 scaleType: UNIT_LOG_SCALE - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 32 - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_char_cnn/hparam_config_civil_comments.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/toxicity maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 64 - 128 - 256 - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_char_cnn/hparam_config_many_communities.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/removed maxTrials: 150 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 64 - 128 - 256 - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_char_cnn/hparam_config_toxicity.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/frac_neg maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 64 - 128 - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_char_cnn/model.py ================================================ """Tensorflow Estimator Character CNN.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.keras import layers from tf_trainer.common import base_model from typing import Set FLAGS = tf.app.flags.FLAGS # Hyperparameters # TODO: Add validation tf.app.flags.DEFINE_float('learning_rate', 0.0001, 'The learning rate to use during training.') tf.app.flags.DEFINE_float('dropout_rate', 0.25, 'The dropout rate to use during training.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. # TODO: add link to relevant public issue/bug/documentation? tf.app.flags.DEFINE_string( 'filter_sizes', '5,7,9,11', 'Comma delimited string for the sizes of convolution filters.') tf.app.flags.DEFINE_integer( 'num_filters', 16, 'Number of convolutional filters for every convolutional layer.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. # TODO: add link to relevant public issue/bug/documentation? tf.app.flags.DEFINE_string( 'dense_units', '256,512,128', 'Comma delimited string for the number of hidden units in the dense layer.') tf.app.flags.DEFINE_integer( 'embedding_size', 32, 'The number of dimensions in the character embedding.') tf.app.flags.DEFINE_string('pooling_type', 'max', 'Average or max pooling.') tf.app.flags.DEFINE_integer('string_len', 1500, 'The length to truncate or pad to.') class TFCharCNNModel(base_model.BaseModel): """TF Character CNN Model TF implementation of a Character CNN. Inputs should be strings. """ def __init__(self, target_labels: Set[str]) -> None: self._target_labels = target_labels @staticmethod def hparams(): filter_sizes = [int(units) for units in FLAGS.filter_sizes.split(',')] dense_units = [int(units) for units in FLAGS.dense_units.split(',')] hparams = tf.contrib.training.HParams( learning_rate=FLAGS.learning_rate, dropout_rate=FLAGS.dropout_rate, filter_sizes=filter_sizes, num_filters=FLAGS.num_filters, dense_units=dense_units, embedding_size=FLAGS.embedding_size, pooling_type=FLAGS.pooling_type, string_len=FLAGS.string_len) return hparams def estimator(self, model_dir): estimator = tf.estimator.Estimator( model_fn=self._model_fn, params=self.hparams(), config=tf.estimator.RunConfig(model_dir=model_dir)) return estimator def _model_fn(self, features, labels, mode, params, config): embedding = tf.Variable( tf.truncated_normal([256, params.embedding_size]), name='char_embedding') texts = features[base_model.TEXT_FEATURE_KEY] batch_size = tf.shape(texts)[0] byte_ids = tf.reshape( tf.cast( tf.decode_raw( tf.sparse_tensor_to_dense( tf.string_split(texts, ''), default_value='\0'), tf.uint8), tf.int32), [batch_size, -1]) padded_ids = tf.slice( tf.concat( [byte_ids, tf.zeros([batch_size, params.string_len], tf.int32)], axis=1), [0, 0], [batch_size, params.string_len]) inputs = tf.nn.embedding_lookup(params=embedding, ids=padded_ids) # Conv X = inputs for filter_size in params.filter_sizes: X = layers.Conv1D( params.num_filters, filter_size, activation='relu', padding='same')( X) if params.pooling_type == 'average': X = layers.GlobalAveragePooling1D()(X) elif params.pooling_type == 'max': X = layers.GlobalMaxPooling1D()(X) else: raise ValueError('Unrecognized pooling type parameter') # FC logits = X for num_units in params.dense_units: logits = tf.layers.dense( inputs=logits, units=num_units, activation=tf.nn.relu) logits = tf.layers.dropout(logits, rate=params.dropout_rate) logits = tf.layers.dense( inputs=logits, units=len(self._target_labels), activation=None) output_heads = [ tf.contrib.estimator.binary_classification_head(name=name) for name in self._target_labels ] multihead = tf.contrib.estimator.multi_head(output_heads) optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) return multihead.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ================================================ FILE: experiments/tf_trainer/tf_char_cnn/run.deploy.sh ================================================ #!/bin/bash # Deploys a saved model on Cloud MLE. if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then MODEL_NAME=tf_char_cnn_$1 else echo "First positional arg must be one of civil_comments, toxicity, many_communities." exit 1 fi # By default, the model is the last one from the user. MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1) # Create a new model. # Will raise an error if the model already exists. gcloud ml-engine models create $MODEL_NAME \ --regions us-central1 # Deploy a model version. MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S") gcloud ml-engine versions create $MODEL_VERSION \ --model $MODEL_NAME \ --origin $MODEL_SAVED_PATH \ --runtime-version 1.10 ================================================ FILE: experiments/tf_trainer/tf_char_cnn/run.hyperparameter.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_char_cnn" MODEL_NAME_DATA=${MODEL_NAME}_$1 JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.10 \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --region=us-east1 \ --verbosity=debug \ --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --embedding_size=300 \ --model_dir="${JOB_DIR}/model_dir" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False echo "Model dir:" echo ${JOB_DIR}/model_dir ================================================ FILE: experiments/tf_trainer/tf_char_cnn/run.local.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" python -m tf_trainer.tf_char_cnn.run \ --train_path=$train_path \ --validate_path=$valid_path \ --model_dir="tf_char_cnn_local_model_dir" \ --labels=$labels \ --label_dtypes=$label_dtypes ================================================ FILE: experiments/tf_trainer/tf_char_cnn/run.ml_engine.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_char_cnn" MODEL_NAME_DATA=${MODEL_NAME}_$1_glove JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.10 \ --scale-tier 'BASIC_GPU' \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --python-version "3.5" \ --region=us-east1 \ --verbosity=debug \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --model_dir="${JOB_DIR}/model_dir" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --batch_size=32 echo "Model dir:" echo ${JOB_DIR}/model_dir ================================================ FILE: experiments/tf_trainer/tf_char_cnn/run.py ================================================ """Experiments with toxicity, civil_comments, many_communities datasets.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import tfrecord_input from tf_trainer.tf_char_cnn import model as tf_char_cnn FLAGS = tf.app.flags.FLAGS def main(argv): del argv # unused dataset = tfrecord_input.TFRecordInput() model = tf_char_cnn.TFCharCNNModel(dataset.labels()) trainer = model_trainer.ModelTrainer(dataset, model) trainer.train_with_eval() serving_input_fn = serving_input.create_text_serving_input_fn( text_feature_name=base_model.TEXT_FEATURE_KEY, example_key_name=base_model.EXAMPLE_KEY) trainer.export(serving_input_fn, base_model.EXAMPLE_KEY, metrics_key="auc/%s" % FLAGS.labels.split(',')[0]) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_cnn/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/tf_cnn/finetune.py ================================================ """Experiments with many_communities dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import nltk import os import pandas as pd import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import text_preprocessor from tf_trainer.common import tfrecord_input from tf_trainer.common import types from tf_trainer.tf_cnn import model as tf_cnn from tensorflow.python.lib.io import file_io FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("embeddings_path", "local_data/glove.6B/glove.6B.100d.txt", "Path to the embeddings file.") tf.app.flags.DEFINE_string("tmp_results_path", None, "Path to the local combined (across communities) results file.") tf.app.flags.mark_flag_as_required("warm_start_from") tf.app.flags.mark_flag_as_required("tmp_results_path") def main(argv): del argv # unused embeddings_path = FLAGS.embeddings_path preprocessor = text_preprocessor.TextPreprocessor(embeddings_path) nltk.download("punkt") train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize) dataset = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=train_preprocess_fn) # TODO: Move embedding *into* Keras model. model_tf = tf_cnn.TFCNNModel(dataset.labels()) model = preprocessor.add_embedding_to_model(model_tf, base_model.TOKENS_FEATURE_KEY) trainer = model_trainer.ModelTrainer(dataset, model, warm_start_from=FLAGS.warm_start_from) trainer.train_with_eval() keys = [("label", "probabilities")] predictions = list(trainer.predict_on_dev(predict_keys=keys)) valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv") df = pd.read_csv(valid_path_csv) labels = df["label"].values community = os.path.basename(FLAGS.validate_path).split("..")[0] assert len(labels) == len(predictions), \ "Labels and predictions must have the same length." d = { "label" : labels, "prediction": [p[keys[0]][1] for p in predictions], "community": [community for p in predictions], } df = pd.DataFrame(data=d) df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+', index=False, header=False) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_cnn/finetune.sh ================================================ #!/bin/bash BASE_PATH="gs://conversationai-models" GCS_RESOURCES="${BASE_PATH}/resources" warm_start_from="gs://conversationai-models/tf_trainer_runs/msushkov/tf_cnn_many_communities_40_per_8_shot_glove/20190723_110543/model_dir" eval_steps=1 eval_period=5 labels="label" label_dtypes="int" text_feature="text" batch_size=24 dense_units="64,64" filter_sizes="3,4,5" num_filters=128 dropout_rate=0.33976339995062715 pooling_type="max" if [ "$1" == "test" ]; then VALIDATION_OR_TEST="test" # Best hparams found on the validation set learning_rate_lst=(0.00035725183171118115) train_steps_lst=(5) else VALIDATION_OR_TEST="validation" # original, original/2, original/5, original/10, original*2 learning_rate_lst=(0.00035725183171118115 0.00017862591 0.00007145036 0.000035725183171118115 0.00071450366) train_steps_lst=(5 10 50) fi combined_results_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/$VALIDATION_OR_TEST" train_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord" for learning_rate in "${learning_rate_lst[@]}"; do echo "Learning rate: $learning_rate" for train_steps in "${train_steps_lst[@]}"; do echo "Train steps: $train_steps" tmp_results_fname="tf_cnn_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv" tmp_results_path="/tmp/$tmp_results_fname" rm $tmp_results_path COUNTER=0 for train_path in `gcloud storage ls $train_dir`; do echo "Community $COUNTER out of 170..." valid_path=${train_path/${VALIDATION_OR_TEST}_episodes\/support/${VALIDATION_OR_TEST}_episodes\/query} rm -rf "tf_cnn_local_model_dir" python -m tf_trainer.tf_cnn.finetune \ --model_dir="tf_cnn_local_model_dir" \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --batch_size=$batch_size \ --dense_units=$dense_units \ --filter_sizes=$filter_sizes \ --num_filters=$num_filters \ --dropout_rate=$dropout_rate \ --learning_rate=$learning_rate \ --pooling_type=$pooling_type \ --text_feature=$text_feature \ --warm_start_from=$warm_start_from \ --tmp_results_path=$tmp_results_path COUNTER=$[$COUNTER +1] done gcloud storage cp $tmp_results_path $combined_results_dir done done ================================================ FILE: experiments/tf_trainer/tf_cnn/hparam_config.yaml ================================================ trainingInput: pythonVersion: '3.5' # scaleTier: CUSTOM # masterType: standard # workerType: standard_gpu # parameterServerType: large_model # workerCount: 1 # parameterServerCount: 1 scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/toxicity # TODO: change based on dataset maxTrials: 120 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 1 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: INTEGER minValue: 16 maxValue: 256 scaleType: UNIT_LOG_SCALE - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 32 - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_cnn/hparam_config_civil_comments.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/toxicity maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 64 - 128 - 256 - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_cnn/hparam_config_many_communities.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/removed maxTrials: 150 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 64 - 128 - 256 - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_cnn/hparam_config_many_communities_40_per_8_shot.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/label maxTrials: 150 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 32 - 64 - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_cnn/hparam_config_toxicity.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/frac_neg maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 64 - 128 - parameterName: filter_sizes type: CATEGORICAL categoricalValues: - '5,5' - '3,4,5' - parameterName: num_filters type: DISCRETE discreteValues: - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: pooling_type type: CATEGORICAL categoricalValues: - 'average' - 'max' ================================================ FILE: experiments/tf_trainer/tf_cnn/model.py ================================================ """Tensorflow Estimator CNN.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.keras import layers from tf_trainer.common import base_model from typing import Set FLAGS = tf.app.flags.FLAGS # Hyperparameters # TODO: Add validation tf.app.flags.DEFINE_float('learning_rate', 0.00003, 'The learning rate to use during training.') tf.app.flags.DEFINE_float('dropout_rate', 0.3, 'The dropout rate to use during training.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. # TODO: add link to relevant public issue/bug/documentation? tf.app.flags.DEFINE_string( 'filter_sizes', '5', 'Comma delimited string for the sizes of convolution filters.') tf.app.flags.DEFINE_integer( 'num_filters', 128, 'Number of convolutional filters for every convolutional layer.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. # TODO: add link to relevant public issue/bug/documentation? tf.app.flags.DEFINE_string( 'dense_units', '128', 'Comma delimited string for the number of hidden units in the dense layer.') tf.app.flags.DEFINE_integer('embedding_size', 300, 'The number of dimensions in the word embedding.') tf.app.flags.DEFINE_string('pooling_type', 'average', 'Average or max pooling.') class TFCNNModel(base_model.BaseModel): """TF CNN Model TF implementation of a CNN. Inputs should be sequences of word embeddings. """ def __init__(self, target_labels: Set[str]) -> None: self._target_labels = target_labels @staticmethod def hparams(): filter_sizes = [int(units) for units in FLAGS.filter_sizes.split(',')] dense_units = [int(units) for units in FLAGS.dense_units.split(',')] hparams = tf.contrib.training.HParams( learning_rate=FLAGS.learning_rate, dropout_rate=FLAGS.dropout_rate, filter_sizes=filter_sizes, num_filters=FLAGS.num_filters, dense_units=dense_units, embedding_size=FLAGS.embedding_size, pooling_type=FLAGS.pooling_type) return hparams def estimator(self, model_dir): estimator = tf.estimator.Estimator( model_fn=self._model_fn, params=self.hparams(), config=tf.estimator.RunConfig(model_dir=model_dir)) return estimator def _model_fn(self, features, labels, mode, params, config): inputs = features[base_model.TOKENS_FEATURE_KEY] batch_size = tf.shape(inputs)[0] # Conv X = inputs for filter_size in params.filter_sizes: X = layers.Conv1D( params.num_filters, filter_size, activation='relu', padding='same')( X) if params.pooling_type == 'average': X = layers.GlobalAveragePooling1D()(X) elif params.pooling_type == 'max': X = layers.GlobalMaxPooling1D()(X) else: raise ValueError('Unrecognized pooling type parameter') # FC logits = X for num_units in params.dense_units: logits = tf.layers.dense( inputs=logits, units=num_units, activation=tf.nn.relu) logits = tf.layers.dropout(logits, rate=params.dropout_rate) logits = tf.layers.dense( inputs=logits, units=len(self._target_labels), activation=None) output_heads = [ tf.contrib.estimator.binary_classification_head(name=name) for name in self._target_labels ] multihead = tf.contrib.estimator.multi_head(output_heads) optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) return multihead.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ================================================ FILE: experiments/tf_trainer/tf_cnn/run.deploy.sh ================================================ #!/bin/bash # Deploys a saved model on Cloud MLE. if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then MODEL_NAME=tf_cnn_$1_glove else echo "First positional arg must be one of civil_comments, toxicity, many_communities." exit 1 fi # By default, the model is the last one from the user. MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1) # Create a new model. # Will raise an error if the model already exists. gcloud ml-engine models create $MODEL_NAME \ --regions us-central1 # Deploy a model version. MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S") gcloud ml-engine versions create $MODEL_VERSION \ --model $MODEL_NAME \ --origin $MODEL_SAVED_PATH \ --runtime-version 1.10 ================================================ FILE: experiments/tf_trainer/tf_cnn/run.hyperparameter.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_cnn" MODEL_NAME_DATA=${MODEL_NAME}_$1_glove JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.12 \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --region=us-east1 \ --verbosity=debug \ --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \ --embedding_size=300 \ --model_dir="${JOB_DIR}/model_dir" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --text_feature=$text_feature echo "Model dir:" echo ${JOB_DIR}/model_dir ================================================ FILE: experiments/tf_trainer/tf_cnn/run.local.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" python -m tf_trainer.tf_cnn.run \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \ --model_dir="tf_cnn_local_model_dir" \ --labels=$labels \ --label_dtypes=$label_dtypes ================================================ FILE: experiments/tf_trainer/tf_cnn/run.ml_engine.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_cnn" MODEL_NAME_DATA=${MODEL_NAME}_$1_glove JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" if [ "$1" == "civil_comments" ]; then batch_size=128 dense_units='128,128' filter_sizes='3,4,5' num_filters=128 dropout_rate=0.01527361736403272 learning_rate=0.0001932910006772403 pooling_type='average' train_steps=50000 eval_period=1000 eval_steps=2000 elif [ "$1" == "toxicity" ]; then batch_size=128 dense_units='64' filter_sizes='3,4,5' num_filters=128 dropout_rate=0.59761635967002524 learning_rate=0.00028233147441192243 pooling_type='max' train_steps=55000 eval_period=1000 eval_steps=1500 elif [ "$1" == "many_communities" ]; then batch_size=128 dense_units='128,128' filter_sizes='3,4,5' num_filters=128 dropout_rate=0.42090135248508892 learning_rate=8.8262915612024245e-05 pooling_type='average' train_steps=700000 eval_period=4000 eval_steps=45000 elif [ "$1" == "many_communities_40_per_8_shot" ]; then train_steps=8000 eval_steps=250 eval_period=200 if [ "$2" == "optimistic" ]; then batch_size=64 dense_units='64' filter_sizes='3,4,5' num_filters=128 dropout_rate=0.50444323963758519 learning_rate=0.00016448334200861331 pooling_type='max' elif [ "$2" == "pessimistic" ]; then batch_size=32 dense_units='64,64' filter_sizes='3,4,5' num_filters=128 dropout_rate=0.33976339995062715 learning_rate=0.00035725183171118115 pooling_type='max' else echo "Must provide second positional argument." exit 1 fi else echo "First positional arg must be one of civil_comments, toxicity, many_communities." return; fi gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.10 \ --scale-tier 'BASIC_GPU' \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --python-version "3.5" \ --region=us-east1 \ --verbosity=debug \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \ --model_dir="${JOB_DIR}/model_dir" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --batch_size=$batch_size \ --dense_units=$dense_units \ --filter_sizes=$filter_sizes \ --num_filters=$num_filters \ --dropout_rate=$dropout_rate \ --learning_rate=$learning_rate \ --pooling_type=$pooling_type \ --text_feature=$text_feature echo "Model dir:" echo ${JOB_DIR}/model_dir ================================================ FILE: experiments/tf_trainer/tf_cnn/run.py ================================================ """Experiments with toxicity, civil_comments, many_communities datasets.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import nltk import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import text_preprocessor from tf_trainer.common import tfrecord_input from tf_trainer.common import types from tf_trainer.tf_cnn import model as tf_cnn FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("embeddings_path", "local_data/glove.6B/glove.6B.100d.txt", "Path to the embeddings file.") def main(argv): del argv # unused embeddings_path = FLAGS.embeddings_path preprocessor = text_preprocessor.TextPreprocessor(embeddings_path) nltk.download("punkt") train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize) dataset = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=train_preprocess_fn) # TODO: Move embedding *into* Keras model. model_tf = tf_cnn.TFCNNModel(dataset.labels()) model = preprocessor.add_embedding_to_model(model_tf, base_model.TOKENS_FEATURE_KEY) trainer = model_trainer.ModelTrainer(dataset, model) trainer.train_with_eval() serving_input_fn = serving_input.create_serving_input_fn( word_to_idx=preprocessor._word_to_idx, unknown_token=preprocessor._unknown_token, text_feature_name=base_model.TOKENS_FEATURE_KEY, example_key_name=base_model.EXAMPLE_KEY) trainer.export(serving_input_fn, base_model.EXAMPLE_KEY, metrics_key="auc/%s" % FLAGS.labels.split(',')[0]) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_gru_attention/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/tf_gru_attention/finetune.py ================================================ """Experiments with many_communities dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import nltk import os import pandas as pd import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import text_preprocessor from tf_trainer.common import tfrecord_input from tf_trainer.common import types from tf_trainer.tf_gru_attention import model as tf_gru_attention from tensorflow.python.lib.io import file_io FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("embeddings_path", "local_data/glove.6B/glove.6B.100d.txt", "Path to the embeddings file.") tf.app.flags.DEFINE_string("tmp_results_path", None, "Path to the local combined (across communities) results file.") tf.app.flags.mark_flag_as_required("warm_start_from") tf.app.flags.mark_flag_as_required("tmp_results_path") def main(argv): del argv # unused embeddings_path = FLAGS.embeddings_path preprocessor = text_preprocessor.TextPreprocessor(embeddings_path) nltk.download("punkt") train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize) dataset = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=train_preprocess_fn) # TODO: Move embedding *into* Keras model. model_tf = tf_gru_attention.TFRNNModel(dataset.labels()) model = preprocessor.add_embedding_to_model(model_tf, base_model.TOKENS_FEATURE_KEY) trainer = model_trainer.ModelTrainer(dataset, model, warm_start_from=FLAGS.warm_start_from) trainer.train_with_eval() keys = [("label", "probabilities")] predictions = list(trainer.predict_on_dev(predict_keys=keys)) valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv") df = pd.read_csv(valid_path_csv) labels = df["label"].values community = os.path.basename(FLAGS.validate_path).split("..")[0] assert len(labels) == len(predictions), \ "Labels and predictions must have the same length." d = { "label" : labels, "prediction": [p[keys[0]][1] for p in predictions], "community": [community for p in predictions], } df = pd.DataFrame(data=d) df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+', index=False, header=False) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_gru_attention/finetune.sh ================================================ #!/bin/bash BASE_PATH="gs://conversationai-models" GCS_RESOURCES="${BASE_PATH}/resources" warm_start_from="gs://conversationai-models/tf_trainer_runs/msushkov/tf_gru_attention_many_communities_40_per_8_shot_glove/20190723_110533/model_dir" eval_steps=1 eval_period=5 labels="label" label_dtypes="int" text_feature="text" batch_size=24 attention_units=64 dropout_rate=0.052541994248873507 dense_units='128,128' gru_units='128' if [ "$1" == "test" ]; then VALIDATION_OR_TEST="test" # Best hparams found on the validation set learning_rate_lst=(0.000049418814574477758) train_steps_lst=(50) else VALIDATION_OR_TEST="validation" # original, original/2, original/5, original/10, original*2, original/20, original/50 #learning_rate_lst=(0.00049418814574477758 0.00024709407 0.00009883762 0.000049418814574477758 0.00098837629 0.0000247094 0.00000988376) # original*4, original*10, original*20 learning_rate_lst=(0.00197675258 0.0049418814574477758 0.00988376291) train_steps_lst=(5 10 50 100) fi combined_results_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/$VALIDATION_OR_TEST" train_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord" for learning_rate in "${learning_rate_lst[@]}"; do echo "Learning rate:" echo $learning_rate for train_steps in "${train_steps_lst[@]}"; do echo "Train steps:" echo $train_steps tmp_results_fname="tf_gru_attention_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv" tmp_results_path="/tmp/$tmp_results_fname" rm $tmp_results_path COUNTER=0 for train_path in `gcloud storage ls $train_dir`; do valid_path=${train_path/${VALIDATION_OR_TEST}_episodes\/support/${VALIDATION_OR_TEST}_episodes\/query} rm -rf "tf_gru_attention_local_model_dir" python -m tf_trainer.tf_gru_attention.finetune \ --model_dir="tf_gru_attention_local_model_dir" \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --batch_size=$batch_size \ --attention_units=$attention_units \ --dropout_rate=$dropout_rate \ --learning_rate=$learning_rate \ --dense_units=$dense_units \ --gru_units=$gru_units \ --text_feature=$text_feature \ --warm_start_from=$warm_start_from \ --tmp_results_path=$tmp_results_path COUNTER=$[$COUNTER +1] done gcloud storage cp $tmp_results_path $combined_results_dir done done ================================================ FILE: experiments/tf_trainer/tf_gru_attention/hparam_config.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: CUSTOM masterType: standard workerType: standard_gpu parameterServerType: large_model workerCount: 1 parameterServerCount: 1 hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/frac_neg maxTrials: 40 maxParallelTrials: 4 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 1 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: INTEGER minValue: 16 maxValue: 128 scaleType: UNIT_LOG_SCALE - parameterName: gru_units type: CATEGORICAL categoricalValues: - '256' - '128' - '128,128' - '64' - '64,64' - parameterName: attention_units type: DISCRETE discreteValues: - 32 - 64 - 124 - 256 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' ================================================ FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_civil_comments.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/toxicity maxTrials: 200 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - parameterName: gru_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: attention_units type: DISCRETE discreteValues: - 32 - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' ================================================ FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_many_communities.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/removed maxTrials: 200 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - parameterName: gru_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: attention_units type: DISCRETE discreteValues: - 32 - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' ================================================ FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_many_communities_40_per_8_shot.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/label maxTrials: 200 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 32 - 64 - parameterName: gru_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: attention_units type: DISCRETE discreteValues: - 32 - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' ================================================ FILE: experiments/tf_trainer/tf_gru_attention/hparam_config_toxicity.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/frac_neg maxTrials: 200 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - parameterName: gru_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' - parameterName: attention_units type: DISCRETE discreteValues: - 32 - 64 - 128 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '64' - '64,64' ================================================ FILE: experiments/tf_trainer/tf_gru_attention/model.py ================================================ """Tensorflow Estimator implementation of RNN Model with Attention""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tf_trainer.common import base_model from typing import Set FLAGS = tf.app.flags.FLAGS # Hyperparameters # TODO: Add validation tf.app.flags.DEFINE_float('learning_rate', 0.00003, 'The learning rate to use during training.') tf.app.flags.DEFINE_float('dropout_rate', 0.3, 'The dropout rate to use during training.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. tf.app.flags.DEFINE_string( 'gru_units', '128', 'Comma delimited string for the number of hidden units in the gru layer.') tf.app.flags.DEFINE_integer('attention_units', 64, 'The number of hidden units in the gru layer.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. tf.app.flags.DEFINE_string( 'dense_units', '128', 'Comma delimited string for the number of hidden units in the dense layer.') def attend(inputs, attention_size, attention_depth=1): """Attention layer.""" sequence_length = tf.shape(inputs)[1] # dynamic final_layer_size = inputs.shape[2] # static x = tf.reshape(inputs, [-1, final_layer_size]) for _ in range(attention_depth - 1): x = tf.layers.dense(x, attention_size, activation=tf.nn.relu) x = tf.layers.dense(x, 1, activation=None) logits = tf.reshape(x, [-1, sequence_length, 1]) alphas = tf.nn.softmax(logits, dim=1) output = tf.reduce_sum(inputs * alphas, 1) return output, alphas class TFRNNModel(base_model.BaseModel): def __init__(self, target_labels: Set[str]) -> None: self._target_labels = target_labels @staticmethod def hparams(): gru_units = [int(units) for units in FLAGS.gru_units.split(',')] dense_units = [int(units) for units in FLAGS.dense_units.split(',')] hparams = tf.contrib.training.HParams( learning_rate=FLAGS.learning_rate, dropout_rate=FLAGS.dropout_rate, gru_units=gru_units, attention_units=FLAGS.attention_units, dense_units=dense_units) return hparams def estimator(self, model_dir): estimator = tf.estimator.Estimator( model_fn=self._model_fn, params=self.hparams(), config=tf.estimator.RunConfig(model_dir=model_dir)) return estimator def _model_fn(self, features, labels, mode, params, config): inputs = features[base_model.TOKENS_FEATURE_KEY] batch_size = tf.shape(inputs)[0] rnn_layers = [ tf.nn.rnn_cell.GRUCell(num_units=size, activation=tf.nn.tanh) for size in params.gru_units ] # create a RNN cell composed sequentially of a number of RNNCells multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) # TODO: make bidirectional outputs, states = tf.nn.dynamic_rnn( multi_rnn_cell, inputs, dtype=tf.float32) # TODO: Handle sequence length in the attention layer (via a mask). # Padded elements should not be part of the average. logits, _ = attend(inputs=outputs, attention_size=params.attention_units) for num_units in params.dense_units: logits = tf.layers.dense( inputs=logits, units=num_units, activation=tf.nn.relu) logits = tf.layers.dropout(logits, rate=params.dropout_rate) logits = tf.layers.dense( inputs=logits, units=len(self._target_labels), activation=None) output_heads = [ tf.contrib.estimator.binary_classification_head(name=name) for name in self._target_labels ] multihead = tf.contrib.estimator.multi_head(output_heads) optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) return multihead.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ================================================ FILE: experiments/tf_trainer/tf_gru_attention/run.deploy.sh ================================================ #!/bin/bash # Deploys a saved model on Cloud MLE. if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then MODEL_NAME=tf_gru_attention_$1_glove else echo "First positional arg must be one of civil_comments, toxicity, many_communities." exit 1 fi # By default, the model is the last one from the user. MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1) # Create a new model. # Will raise an error if the model already exists. gcloud ml-engine models create $MODEL_NAME \ --regions us-central1 # Deploy a model version. MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S") gcloud ml-engine versions create $MODEL_VERSION \ --model $MODEL_NAME \ --origin $MODEL_SAVED_PATH \ --runtime-version 1.10 ================================================ FILE: experiments/tf_trainer/tf_gru_attention/run.hyperparameter.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_gru_attention" MODEL_NAME_DATA="${MODEL_NAME}_$1_glove" JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.12 \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --region=us-east1 \ --verbosity=debug \ --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \ --embedding_size=300 \ --model_dir="${JOB_DIR}/model_dir" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --text_feature=$text_feature echo "Model dir:" echo ${JOB_DIR}/model_dir ================================================ FILE: experiments/tf_trainer/tf_gru_attention/run.local.sh ================================================ #!/bin/bash # Note: # We currently use 2 different embeddings: # - glove.6B/glove.6B.300d.txt # - google-news/GoogleNews-vectors-negative300.txt # Glove assumes all words are lowercased, while Google-news handles different casing. # As there is currently no tf operation that perform lowercasing, we have the following # requirements: # - For google news: Run preprocess_in_tf=True (no lowercasing). # - For glove.6B, Run preprocess_in_tf=False (will force lowercasing). source "tf_trainer/common/dataset_config.sh" python -m tf_trainer.tf_gru_attention.run \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \ --model_dir="tf_gru_attention_local_model_dir" \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --text_feature=$text_feature ================================================ FILE: experiments/tf_trainer/tf_gru_attention/run.ml_engine.sh ================================================ #!/bin/bash # This script runs one training job on Cloud MLE. # Note: # We currently use 2 different embeddings: # - glove.6B/glove.6B.300d.txt # - google-news/GoogleNews-vectors-negative300.txt # Glove assumes all words are lowercased, while Google-news handles different casing. # As there is currently no tf operation that perform lowercasing, we have the following # requirements: # - For google news: Run preprocess_in_tf=True (no lowercasing). # - For glove.6B, Run preprocess_in_tf=False (will force lowercasing). source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_gru_attention" MODEL_NAME_DATA=${MODEL_NAME}_$1_glove JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" if [ "$1" == "civil_comments" ]; then batch_size=128 attention_units=32 dropout_rate=0.60960359286224075 learning_rate=0.0010256671195808884 dense_units='128' gru_units='128,128' train_steps=50000 eval_period=1000 eval_steps=2000 config="tf_trainer/common/basic_gpu_config.yaml" text_feature="comment_text" elif [ "$1" == "toxicity" ]; then batch_size=32 attention_units=32 dropout_rate=0.69999994803861521 learning_rate=0.00030340058446715442 dense_units='128' gru_units='128,128' train_steps=250000 eval_period=1000 eval_steps=6000 config="tf_trainer/common/basic_gpu_config.yaml" text_feature="comment_text" elif [ "$1" == "many_communities" ]; then batch_size=128 attention_units=32 dropout_rate=0.38471142580880757 learning_rate=0.000755324856537066 dense_units='128' gru_units='128' train_steps=700000 eval_period=4000 eval_steps=45000 config="tf_trainer/common/p100_config.yaml" text_feature="comment_text" elif [ "$1" == "many_communities_40_per_8_shot" ]; then train_steps=8000 eval_steps=250 eval_period=200 config="tf_trainer/common/basic_gpu_config.yaml" if [ "$2" == "optimistic" ]; then batch_size=64 attention_units=32 dropout_rate=0.69778643162683085 learning_rate=0.00080291321858594659 dense_units='128,128' gru_units='128' elif [ "$2" == "pessimistic" ]; then batch_size=32 attention_units=64 dropout_rate=0.052541994248873507 learning_rate=0.00049418814574477758 dense_units='128,128' gru_units='128' else echo "Must provide second positional argument." exit 1 fi else echo "First positional arg must be one of civil_comments, toxicity, many_communities." return; fi gcloud ai-platform jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.10 \ --config $config \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --region=us-east1 \ --verbosity=debug \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt" \ --model_dir="${JOB_DIR}/model_dir" \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --batch_size=$batch_size \ --attention_units=$attention_units \ --dropout_rate=$dropout_rate \ --learning_rate=$learning_rate \ --dense_units=$dense_units \ --gru_units=$gru_units \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --text_feature=$text_feature ================================================ FILE: experiments/tf_trainer/tf_gru_attention/run.py ================================================ """Experiments with Toxicity Dataset""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import nltk import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import text_preprocessor from tf_trainer.common import tfrecord_input from tf_trainer.common import types from tf_trainer.tf_gru_attention import model as tf_gru_attention FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("embeddings_path", "local_data/glove.6B/glove.6B.100d.txt", "Path to the embeddings file.") def main(argv): del argv # unused embeddings_path = FLAGS.embeddings_path preprocessor = text_preprocessor.TextPreprocessor(embeddings_path) nltk.download("punkt") train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize) dataset = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=train_preprocess_fn) # TODO: Move embedding *into* Keras model. model_tf = tf_gru_attention.TFRNNModel(dataset.labels()) model = preprocessor.add_embedding_to_model(model_tf, base_model.TOKENS_FEATURE_KEY) trainer = model_trainer.ModelTrainer(dataset, model) trainer.train_with_eval() serving_input_fn = serving_input.create_serving_input_fn( word_to_idx=preprocessor._word_to_idx, unknown_token=preprocessor._unknown_token, text_feature_name=base_model.TOKENS_FEATURE_KEY, example_key_name=base_model.EXAMPLE_KEY) trainer.export(serving_input_fn, base_model.EXAMPLE_KEY, metrics_key="auc/%s" % FLAGS.labels.split(',')[0]) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/finetune.py ================================================ """Experiments with many_communities dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import tfrecord_input from tf_trainer.tf_hub_classifier import model as tf_hub_classifier import os import pandas as pd import tensorflow as tf FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("embeddings_path", "local_data/glove.6B/glove.6B.100d.txt", "Path to the embeddings file.") tf.app.flags.DEFINE_string("tmp_results_path", None, "Path to the local combined (across communities) results file.") tf.app.flags.mark_flag_as_required("warm_start_from") tf.app.flags.mark_flag_as_required("tmp_results_path") def main(argv): del argv # unused dataset = tfrecord_input.TFRecordInput() model = tf_hub_classifier.TFHubClassifierModel(dataset.labels()) trainer = model_trainer.ModelTrainer(dataset, model, warm_start_from=FLAGS.warm_start_from) trainer.train_with_eval() keys = [("label", "probabilities")] predictions = list(trainer.predict_on_dev(predict_keys=keys)) valid_path_csv = FLAGS.validate_path.replace("..tfrecord", ".csv") df = pd.read_csv(valid_path_csv) labels = df["label"].values community = os.path.basename(FLAGS.validate_path).split("..")[0] assert len(labels) == len(predictions), \ "Labels and predictions must have the same length." d = { "label" : labels, "prediction": [p[keys[0]][1] for p in predictions], "community": [community for p in predictions], } df = pd.DataFrame(data=d) df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+', index=False, header=False) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/finetune.sh ================================================ #!/bin/bash BASE_PATH="gs://conversationai-models" GCS_RESOURCES="${BASE_PATH}/resources" warm_start_from="gs://conversationai-models/tf_trainer_runs/msushkov/tf_hub_classifier_many_communities_40_per_8_shot/20190723_110557/model_dir" eval_steps=1 eval_period=5 labels="label" label_dtypes="int" text_feature="text" batch_size=24 dropout_rate=0.53291173797826941 dense_units='256,128,64' if [ "$1" == "test" ]; then VALIDATION_OR_TEST="test" # Best hparams found on the validation set learning_rate_lst=(0.00001238498) train_steps_lst=(50) else VALIDATION_OR_TEST="validation" # original, original/2, original/5, original/10, original*2 learning_rate_lst=(6.1924912697697353e-06 0.00000309624 0.00000123849 6.1924912697697353e-07 0.00001238498) train_steps_lst=(5 10 50) fi train_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord" combined_results_dir="gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/$VALIDATION_OR_TEST" for learning_rate in "${learning_rate_lst[@]}"; do echo "Learning rate:" echo $learning_rate for train_steps in "${train_steps_lst[@]}"; do echo "Train steps:" echo $train_steps tmp_results_fname="tf_hub_classifier_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv" tmp_results_path="/tmp/$tmp_results_fname" rm $tmp_results_path COUNTER=0 for train_path in `gcloud storage ls $train_dir`; do valid_path=${train_path/${VALIDATION_OR_TEST}_episodes\/support/${VALIDATION_OR_TEST}_episodes\/query} rm -rf "tf_hub_classifier_local_model_dir" python -m tf_trainer.tf_hub_classifier.finetune \ --model_dir="tf_hub_classifier_local_model_dir" \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --batch_size=$batch_size \ --dense_units=$dense_units \ --dropout_rate=$dropout_rate \ --learning_rate=$learning_rate \ --text_feature=$text_feature \ --warm_start_from=$warm_start_from \ --tmp_results_path=$tmp_results_path COUNTER=$[$COUNTER +1] done gcloud storage cp $tmp_results_path $combined_results_dir done done ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: CUSTOM masterType: standard workerType: standard_gpu parameterServerType: large_model workerCount: 1 parameterServerCount: 1 hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/frac_neg maxTrials: 40 maxParallelTrials: 4 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 1 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: INTEGER minValue: 16 maxValue: 256 scaleType: UNIT_LOG_SCALE - parameterName: dense_units type: CATEGORICAL categoricalValues: - '512,128,64' - '128,64,64' - '128,64' - '512,64' - '128,128,128,64' ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_civil_comments.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/toxicity maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '512,128,64' - '128,64,64' - '128,64' - '512,64' - '128,128,128,64' ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_many_communities.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/removed maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '512,128,64' - '128,64,64' - '128,64' - '512,64' - '128,128,128,64' ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_many_communities_40_per_8_shot.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/label maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 32 - 64 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '256,128,64' - '128,64,64' - '128,64' - '512,64' ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/hparam_config_toxicity.yaml ================================================ trainingInput: pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/frac_neg maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: FALSE params: - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 0.7 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '512,128,64' - '128,64,64' - '128,64' - '512,64' - '128,128,128,64' ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/model.py ================================================ """Tensorflow Estimator using TF Hub universal sentence encoder.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import tensorflow_hub as hub from tf_trainer.common import base_model from typing import List FLAGS = tf.app.flags.FLAGS # Hyperparameters # TODO: Add validation tf.app.flags.DEFINE_float('learning_rate', 0.00003, 'The learning rate to use during training.') tf.app.flags.DEFINE_float('dropout_rate', 0.15, 'The dropout rate to use during training.') tf.app.flags.DEFINE_string( 'model_spec', 'https://tfhub.dev/google/universal-sentence-encoder-large/3', 'The url of the TF Hub sentence encoding module to use.') tf.app.flags.DEFINE_bool('trainable', True, 'What to pass for the TF Hub trainable parameter.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. The length of the list # determines the number of layers, and the size of each layer. tf.app.flags.DEFINE_string( 'dense_units', '1024,1024,512', 'Comma delimited string for the number of hidden units in the dense layers.' ) class TFHubClassifierModel(base_model.BaseModel): def __init__(self, target_labels: List[str]) -> None: self._target_labels = target_labels @staticmethod def hparams(): dense_units = [int(units) for units in FLAGS.dense_units.split(',')] hparams = tf.contrib.training.HParams( learning_rate=FLAGS.learning_rate, dropout_rate=FLAGS.dropout_rate, dense_units=dense_units) return hparams def estimator(self, model_dir): estimator = tf.estimator.Estimator( model_fn=self._model_fn, params=self.hparams(), config=tf.estimator.RunConfig(model_dir=model_dir)) return estimator def _model_fn(self, features, labels, mode, params, config): embedded_text_feature_column = hub.text_embedding_column( key=base_model.TEXT_FEATURE_KEY, module_spec=FLAGS.model_spec, trainable=FLAGS.trainable) inputs = tf.feature_column.input_layer(features, [embedded_text_feature_column]) batch_size = tf.shape(inputs)[0] logits = inputs for num_units in params.dense_units: logits = tf.layers.dense( inputs=logits, units=num_units, activation=tf.nn.relu) logits = tf.layers.dropout(logits, rate=params.dropout_rate) logits = tf.layers.dense( inputs=logits, units=len(self._target_labels), activation=None) output_heads = [ tf.contrib.estimator.binary_classification_head( name=name, weight_column=name + '_weight') for name in self._target_labels ] multihead = tf.contrib.estimator.multi_head(output_heads) optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) return multihead.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/run.deploy.sh ================================================ #!/bin/bash # Deploys a saved model on Cloud MLE. if [ "$1" == "civil_comments" ] || [ "$1" == "toxicity" ] || [ "$1" == "many_communities" ] ; then MODEL_NAME=tf_hub_classifier_$1 else echo "First positional arg must be one of civil_comments, toxicity, many_communities." exit 1 fi # By default, the model is the last one from the user. MODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1) # Create a new model. # Will raise an error if the model already exists. gcloud ml-engine models create $MODEL_NAME \ --regions us-central1 # Deploy a model version. MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S") gcloud ml-engine versions create $MODEL_VERSION \ --model $MODEL_NAME \ --origin $MODEL_SAVED_PATH \ --runtime-version 1.10 ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/run.hyperparameter.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_hub_classifier" MODEL_NAME_DATA="${MODEL_NAME}_$1" JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.12 \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --region=us-east1 \ --verbosity=debug \ --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --model_dir="${JOB_DIR}/model_dir" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False \ --model_spec="gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73" \ --text_feature=$text_feature echo "Model dir:" echo ${JOB_DIR}/model_dir ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/run.local.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" python -m tf_trainer.tf_hub_classifier.run \ --train_path=$train_path \ --validate_path=$valid_path \ --model_dir="tf_hub_classifier_local_model_dir" \ --model_spec="gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73" \ --labels=$labels \ --label_dtypes=$label_dtypes ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/run.ml_engine.sh ================================================ #!/bin/bash # This script runs one training job on Cloud MLE. source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_hub_classifier" MODEL_NAME_DATA="${MODEL_NAME}_$1" JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" if [ "$1" == "civil_comments" ]; then batch_size=128 dropout_rate=0.12298246947263007 learning_rate=0.0001473127671008433 dense_units='512,128,64' train_steps=50000 eval_period=1000 eval_steps=2000 config="tf_trainer/common/p100_config.yaml" elif [ "$1" == "toxicity" ]; then batch_size=32 dropout_rate=0.38925458520872092 learning_rate=0.00012916208894260696 dense_units='512,128,64' train_steps=250000 eval_period=1000 eval_steps=6000 config="tf_trainer/common/p100_config.yaml" elif [ "$1" == "many_communities" ]; then batch_size=128 dropout_rate=0.6987085501984901 learning_rate=0.00031738926545884962 dense_units='512,128,64' train_steps=700000 eval_period=4000 eval_steps=45000 config="tf_trainer/common/basic_gpu_config.yaml" elif [ "$1" == "many_communities_40_per_8_shot" ]; then train_steps=8000 eval_steps=250 eval_period=200 config="tf_trainer/common/basic_gpu_config.yaml" if [ "$2" == "optimistic" ]; then batch_size=32 dropout_rate=0.69999979814967772 learning_rate=7.2549254796945835e-06 dense_units='512,64' elif [ "$2" == "pessimistic" ]; then batch_size=32 dropout_rate=0.53291173797826941 learning_rate=6.1924912697697353e-06 dense_units='256,128,64' else echo "Must provide second positional argument." exit 1 fi else echo "First positional arg must be one of civil_comments, toxicity, many_communities." return; fi gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.10 \ --config $config \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --region=us-east1 \ --verbosity=debug \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --model_dir="${JOB_DIR}/model_dir" \ --labels=$labels \ --label_dtypes=$label_dtypes \ --batch_size=$batch_size \ --dropout_rate=$dropout_rate \ --learning_rate=$learning_rate \ --dense_units=$dense_units \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --model_spec="gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73" \ --text_feature=$text_feature ================================================ FILE: experiments/tf_trainer/tf_hub_classifier/run.py ================================================ """Experiments with Toxicity Dataset""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import tfrecord_input from tf_trainer.tf_hub_classifier import model as tf_hub_classifier import tensorflow as tf FLAGS = tf.app.flags.FLAGS def main(argv): del argv # unused dataset = tfrecord_input.TFRecordInput() model = tf_hub_classifier.TFHubClassifierModel(dataset.labels()) trainer = model_trainer.ModelTrainer(dataset, model) trainer.train_with_eval() serving_input_fn = serving_input.create_text_serving_input_fn( text_feature_name=base_model.TEXT_FEATURE_KEY, example_key_name=base_model.EXAMPLE_KEY) trainer.export(serving_input_fn, base_model.EXAMPLE_KEY, metrics_key="auc/%s" % FLAGS.labels.split(',')[0]) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_hub_tfjs/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/tf_hub_tfjs/model.py ================================================ """Tensorflow Estimator using TF Hub universal sentence encoder.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import tensorflow_hub as hub from tf_trainer.common import base_model from typing import List FLAGS = tf.app.flags.FLAGS # Hyperparameters # TODO: Add validation tf.app.flags.DEFINE_float('learning_rate', 0.00005, 'The learning rate to use during training.') tf.app.flags.DEFINE_float('dropout_rate', 0.38925, 'The dropout rate to use during training.') tf.app.flags.DEFINE_string( 'model_spec', 'https://tfhub.dev/google/universal-sentence-encoder-lite/2', 'The url of the TF Hub sentence encoding module to use.') tf.app.flags.DEFINE_bool('trainable', True, 'What to pass for the TF Hub trainable parameter.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. The length of the list # determines the number of layers, and the size of each layer. tf.app.flags.DEFINE_string( 'dense_units', '512,128,64', 'Comma delimited string for the number of hidden units in the dense layers.' ) class TFHubClassifierModel(base_model.BaseModel): def __init__(self, target_labels: List[str]) -> None: self._target_labels = target_labels @staticmethod def hparams(): dense_units = [int(units) for units in FLAGS.dense_units.split(',')] hparams = tf.contrib.training.HParams( learning_rate=FLAGS.learning_rate, dropout_rate=FLAGS.dropout_rate, dense_units=dense_units) return hparams def estimator(self, model_dir): estimator = tf.estimator.Estimator( model_fn=self._model_fn, params=self.hparams(), config=tf.estimator.RunConfig(model_dir=model_dir)) return estimator def _model_fn(self, features, labels, mode, params, config): module = hub.Module(FLAGS.model_spec, trainable=True) logits = module( inputs=dict( values=features['values'], indices=features['indices'], dense_shape=features['dense_shape'])) for num_units in params.dense_units: logits = tf.layers.dense( inputs=logits, units=num_units, activation=tf.nn.relu) logits = tf.layers.dropout(logits, rate=params.dropout_rate) logits = tf.layers.dense( inputs=logits, units=len(self._target_labels), activation=None) output_heads = [ tf.contrib.estimator.binary_classification_head( name=name, weight_column=name + '_weight') for name in self._target_labels ] multihead = tf.contrib.estimator.multi_head(output_heads) optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) return multihead.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ================================================ FILE: experiments/tf_trainer/tf_hub_tfjs/notebook/BiasEvaluation.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "BiasEvaluation.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "metadata": { "id": "9B7PdsrvW__k", "colab_type": "text" }, "cell_type": "markdown", "source": [ "# Bias Evaluation for TF Javascript Model\n", "\n", "Based on the [FAT* Tutorial Measuring Unintended Bias in Text Classification Models with Real Data](https://github.com/conversationai/unintended-ml-bias-analysis/blob/master/presentations/FAT_star_tutorial.md).\n", "\n", "Copyright 2019 Google LLC.\n", "SPDX-License-Identifier: Apache-2.0" ] }, { "metadata": { "id": "0Jsjp3E5rbuC", "colab_type": "code", "outputId": "e6aeceef-b28b-4c9d-aec9-c870def2219f", "colab": { "base_uri": "https://localhost:8080/", "height": 35 } }, "cell_type": "code", "source": [ "!pip3 install --quiet \"tensorflow>=1.11\"\n", "!pip3 install --quiet sentencepiece" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "\u001b[?25l\r\u001b[K 0% |▎ | 10kB 16.3MB/s eta 0:00:01\r\u001b[K 1% |▋ | 20kB 2.2MB/s eta 0:00:01\r\u001b[K 2% |█ | 30kB 3.3MB/s eta 0:00:01\r\u001b[K 3% |█▎ | 40kB 2.2MB/s eta 0:00:01\r\u001b[K 4% |█▋ | 51kB 2.7MB/s eta 0:00:01\r\u001b[K 5% |██ | 61kB 3.2MB/s eta 0:00:01\r\u001b[K 6% |██▏ | 71kB 3.7MB/s eta 0:00:01\r\u001b[K 7% |██▌ | 81kB 4.1MB/s eta 0:00:01\r\u001b[K 8% |██▉ | 92kB 4.6MB/s eta 0:00:01\r\u001b[K 9% |███▏ | 102kB 3.5MB/s eta 0:00:01\r\u001b[K 10% |███▌ | 112kB 3.6MB/s eta 0:00:01\r\u001b[K 11% |███▉ | 122kB 5.0MB/s eta 0:00:01\r\u001b[K 12% |████ | 133kB 5.0MB/s eta 0:00:01\r\u001b[K 13% |████▍ | 143kB 9.3MB/s eta 0:00:01\r\u001b[K 14% |████▊ | 153kB 9.5MB/s eta 0:00:01\r\u001b[K 15% |█████ | 163kB 9.5MB/s eta 0:00:01\r\u001b[K 16% |█████▍ | 174kB 9.3MB/s eta 0:00:01\r\u001b[K 17% |█████▊ | 184kB 9.4MB/s eta 0:00:01\r\u001b[K 18% |██████ | 194kB 9.4MB/s eta 0:00:01\r\u001b[K 19% |██████▎ | 204kB 40.6MB/s eta 0:00:01\r\u001b[K 20% |██████▋ | 215kB 10.4MB/s eta 0:00:01\r\u001b[K 21% |███████ | 225kB 10.5MB/s eta 0:00:01\r\u001b[K 22% |███████▎ | 235kB 10.2MB/s eta 0:00:01\r\u001b[K 23% |███████▋ | 245kB 10.1MB/s eta 0:00:01\r\u001b[K 24% |███████▉ | 256kB 10.1MB/s eta 0:00:01\r\u001b[K 25% |████████▏ | 266kB 9.9MB/s eta 0:00:01\r\u001b[K 26% |████████▌ | 276kB 10.2MB/s eta 0:00:01\r\u001b[K 27% |████████▉ | 286kB 10.2MB/s eta 0:00:01\r\u001b[K 28% |█████████▏ | 296kB 10.2MB/s eta 0:00:01\r\u001b[K 29% |█████████▌ | 307kB 10.4MB/s eta 0:00:01\r\u001b[K 30% |█████████▊ | 317kB 42.5MB/s eta 0:00:01\r\u001b[K 31% |██████████ | 327kB 42.5MB/s eta 0:00:01\r\u001b[K 32% |██████████▍ | 337kB 49.1MB/s eta 0:00:01\r\u001b[K 33% |██████████▊ | 348kB 45.7MB/s eta 0:00:01\r\u001b[K 34% |███████████ | 358kB 44.8MB/s eta 0:00:01\r\u001b[K 35% |███████████▍ | 368kB 49.3MB/s eta 0:00:01\r\u001b[K 36% |███████████▋ | 378kB 47.6MB/s eta 0:00:01\r\u001b[K 37% |████████████ | 389kB 47.8MB/s eta 0:00:01\r\u001b[K 38% |████████████▎ | 399kB 12.3MB/s eta 0:00:01\r\u001b[K 39% |████████████▋ | 409kB 12.3MB/s eta 0:00:01\r\u001b[K 40% |█████████████ | 419kB 12.3MB/s eta 0:00:01\r\u001b[K 41% |█████████████▎ | 430kB 12.2MB/s eta 0:00:01\r\u001b[K 42% |█████████████▌ | 440kB 12.1MB/s eta 0:00:01\r\u001b[K 43% |█████████████▉ | 450kB 12.2MB/s eta 0:00:01\r\u001b[K 44% |██████████████▏ | 460kB 12.2MB/s eta 0:00:01\r\u001b[K 45% |██████████████▌ | 471kB 12.2MB/s eta 0:00:01\r\u001b[K 46% |██████████████▉ | 481kB 12.3MB/s eta 0:00:01\r\u001b[K 47% |███████████████▏ | 491kB 12.2MB/s eta 0:00:01\r\u001b[K 48% |███████████████▍ | 501kB 47.1MB/s eta 0:00:01\r\u001b[K 49% |███████████████▊ | 512kB 44.5MB/s eta 0:00:01\r\u001b[K 50% |████████████████ | 522kB 45.4MB/s eta 0:00:01\r\u001b[K 51% |████████████████▍ | 532kB 47.8MB/s eta 0:00:01\r\u001b[K 52% |████████████████▊ | 542kB 49.3MB/s eta 0:00:01\r\u001b[K 53% |█████████████████ | 552kB 53.0MB/s eta 0:00:01\r\u001b[K 54% |█████████████████▎ | 563kB 53.3MB/s eta 0:00:01\r\u001b[K 55% |█████████████████▋ | 573kB 51.8MB/s eta 0:00:01\r\u001b[K 56% |██████████████████ | 583kB 51.9MB/s eta 0:00:01\r\u001b[K 57% |██████████████████▎ | 593kB 53.4MB/s eta 0:00:01\r\u001b[K 58% |██████████████████▋ | 604kB 53.2MB/s eta 0:00:01\r\u001b[K 59% |███████████████████ | 614kB 57.7MB/s eta 0:00:01\r\u001b[K 60% |███████████████████▏ | 624kB 55.6MB/s eta 0:00:01\r\u001b[K 61% |███████████████████▌ | 634kB 54.8MB/s eta 0:00:01\r\u001b[K 62% |███████████████████▉ | 645kB 53.2MB/s eta 0:00:01\r\u001b[K 63% |████████████████████▏ | 655kB 52.5MB/s eta 0:00:01\r\u001b[K 64% |████████████████████▌ | 665kB 44.3MB/s eta 0:00:01\r\u001b[K 64% |████████████████████▉ | 675kB 45.4MB/s eta 0:00:01\r\u001b[K 65% |█████████████████████▏ | 686kB 45.2MB/s eta 0:00:01\r\u001b[K 66% |█████████████████████▍ | 696kB 45.8MB/s eta 0:00:01\r\u001b[K 67% |█████████████████████▊ | 706kB 45.4MB/s eta 0:00:01\r\u001b[K 68% |██████████████████████ | 716kB 45.6MB/s eta 0:00:01\r\u001b[K 69% |██████████████████████▍ | 727kB 45.7MB/s eta 0:00:01\r\u001b[K 70% |██████████████████████▊ | 737kB 45.4MB/s eta 0:00:01\r\u001b[K 71% |███████████████████████ | 747kB 47.7MB/s eta 0:00:01\r\u001b[K 72% |███████████████████████▎ | 757kB 47.9MB/s eta 0:00:01\r\u001b[K 73% |███████████████████████▋ | 768kB 58.4MB/s eta 0:00:01\r\u001b[K 74% |████████████████████████ | 778kB 55.5MB/s eta 0:00:01\r\u001b[K 75% |████████████████████████▎ | 788kB 55.7MB/s eta 0:00:01\r\u001b[K 76% |████████████████████████▋ | 798kB 53.9MB/s eta 0:00:01\r\u001b[K 77% |█████████████████████████ | 808kB 54.4MB/s eta 0:00:01\r\u001b[K 78% |█████████████████████████▏ | 819kB 28.4MB/s eta 0:00:01\r\u001b[K 79% |█████████████████████████▌ | 829kB 28.7MB/s eta 0:00:01\r\u001b[K 80% |█████████████████████████▉ | 839kB 28.7MB/s eta 0:00:01\r\u001b[K 81% |██████████████████████████▏ | 849kB 28.3MB/s eta 0:00:01\r\u001b[K 82% |██████████████████████████▌ | 860kB 26.5MB/s eta 0:00:01\r\u001b[K 83% |██████████████████████████▉ | 870kB 26.4MB/s eta 0:00:01\r\u001b[K 84% |███████████████████████████ | 880kB 26.9MB/s eta 0:00:01\r\u001b[K 85% |███████████████████████████▍ | 890kB 27.1MB/s eta 0:00:01\r\u001b[K 86% |███████████████████████████▊ | 901kB 27.6MB/s eta 0:00:01\r\u001b[K 87% |████████████████████████████ | 911kB 27.2MB/s eta 0:00:01\r\u001b[K 88% |████████████████████████████▍ | 921kB 49.5MB/s eta 0:00:01\r\u001b[K 89% |████████████████████████████▊ | 931kB 48.4MB/s eta 0:00:01\r\u001b[K 90% |█████████████████████████████ | 942kB 48.4MB/s eta 0:00:01\r\u001b[K 91% |█████████████████████████████▎ | 952kB 48.8MB/s eta 0:00:01\r\u001b[K 92% |█████████████████████████████▋ | 962kB 53.0MB/s eta 0:00:01\r\u001b[K 93% |██████████████████████████████ | 972kB 53.4MB/s eta 0:00:01\r\u001b[K 94% |██████████████████████████████▎ | 983kB 53.5MB/s eta 0:00:01\r\u001b[K 95% |██████████████████████████████▋ | 993kB 52.7MB/s eta 0:00:01\r\u001b[K 96% |██████████████████████████████▉ | 1.0MB 53.2MB/s eta 0:00:01\r\u001b[K 97% |███████████████████████████████▏| 1.0MB 54.1MB/s eta 0:00:01\r\u001b[K 98% |███████████████████████████████▌| 1.0MB 53.1MB/s eta 0:00:01\r\u001b[K 99% |███████████████████████████████▉| 1.0MB 54.6MB/s eta 0:00:01\r\u001b[K 100% |████████████████████████████████| 1.0MB 17.6MB/s \n", "\u001b[?25h" ], "name": "stdout" } ] }, { "metadata": { "id": "4bSQf93oVo7j", "colab_type": "code", "outputId": "191c3e9f-d902-4071-e115-720d8d2ed1a5", "colab": { "base_uri": "https://localhost:8080/", "height": 53 } }, "cell_type": "code", "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "import re\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import tensorflow as tf\n", "import sentencepiece\n", "from google.colab import auth\n", "from IPython.display import HTML, display\n", "\n", "from sklearn import metrics\n", "\n", "%matplotlib inline\n", "\n", "# autoreload makes it easier to interactively work on code in imported libraries\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "# Set pandas display options so we can read more of the comment text.\n", "pd.set_option('max_colwidth', 300)\n", "\n", "# Seed for Pandas sampling, to get consistent sampling results\n", "RANDOM_STATE = 123456789" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ], "name": "stdout" } ] }, { "metadata": { "id": "12LU1AjWr-da", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "auth.authenticate_user()" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "FFFXbLiRrvtz", "colab_type": "code", "outputId": "69de8876-a0d8-4e31-816c-a3c135854faa", "colab": { "base_uri": "https://localhost:8080/", "height": 125 } }, "cell_type": "code", "source": [ "!mkdir -p tfjs_model\n", "!gcloud storage cp --recursive gs://conversationai-public/public_models/tfjs/v1/* tfjs_model" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Copying gs://conversationai-public/public_models/tfjs/v1/saved_model.pb...\n", "Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.data-00000-of-00001...\n", "/ [0 files][ 0.0 B/ 3.9 MiB] \rCopying gs://conversationai-public/public_models/tfjs/v1/variables/variables.index...\n", "Copying gs://conversationai-public/public_models/tfjs/v1/assets/universal_encoder_8k_spm.model...\n", "- [4/4 files][ 32.3 MiB/ 32.3 MiB] 100% Done \n", "Operation completed over 4 objects/32.3 MiB. \n" ], "name": "stdout" } ] }, { "metadata": { "id": "0bmiyJR60gDP", "colab_type": "code", "outputId": "ded1805f-f50c-4846-cafb-dfb51d79fa4d", "colab": { "base_uri": "https://localhost:8080/", "height": 35 } }, "cell_type": "code", "source": [ "test_df = pd.read_csv(\n", " 'https://raw.githubusercontent.com/conversationai/unintended-ml-bias-analysis/master/unintended_ml_bias/new_madlibber/output_data/English/intersectional_madlibs.csv')\n", "print('test data has %d rows' % len(test_df))\n" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "test data has 30240 rows\n" ], "name": "stdout" } ] }, { "metadata": { "id": "lbF4Fy-yjnaH", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "madlibs_words = pd.read_csv(\n", " 'https://raw.githubusercontent.com/conversationai/unintended-ml-bias-analysis/master/unintended_ml_bias/new_madlibber/input_data/English/words.csv')" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "rwx0ucIXj4Ba", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "identity_columns = madlibs_words[madlibs_words.type=='identity'].word.tolist()" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "mzY7oTzQlHq5", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "for term in identity_columns:\n", " test_df[term] = test_df['phrase'].apply(\n", " lambda x: bool(re.search(r'\\b{}\\b'.format(term), x,\n", " flags=re.UNICODE|re.IGNORECASE)))\n" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "6dP7ANLcl1NC", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "_8RfGq2lX2EY", "colab_type": "text" }, "cell_type": "markdown", "source": [ "## Score test set with our text classification model\n", "\n", "Using our new model, we can score the set of test comments for toxicity.\n" ] }, { "metadata": { "id": "AfC_yo0Tt5SQ", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "TOXICITY_COLUMN = 'toxicity'\n", "TEXT_COLUMN = 'phrase'" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "E0KT0565tUDp", "colab_type": "code", "outputId": "7bbd2622-ea7e-43dd-a6b9-86d1e033508a", "colab": { "base_uri": "https://localhost:8080/", "height": 289 } }, "cell_type": "code", "source": [ "predict_fn = tf.contrib.predictor.from_saved_model(\n", " 'tfjs_model', signature_def_key='predict')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "\n", "WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n", "For more information, please see:\n", " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n", " * https://github.com/tensorflow/addons\n", "If you depend on functionality not listed there, please file an issue.\n", "\n", "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n", "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use standard file APIs to check for files with this prefix.\n", "INFO:tensorflow:Restoring parameters from tfjs_model/variables/variables\n" ], "name": "stdout" } ] }, { "metadata": { "id": "ZppO68XctZPH", "colab_type": "code", "outputId": "630cbb60-9f58-4d28-a5da-45b4091f6715", "colab": { "base_uri": "https://localhost:8080/", "height": 35 } }, "cell_type": "code", "source": [ "sp = sentencepiece.SentencePieceProcessor()\n", "sp.Load('tfjs_model/assets/universal_encoder_8k_spm.model')" ], "execution_count": 0, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "True" ] }, "metadata": { "tags": [] }, "execution_count": 17 } ] }, { "metadata": { "id": "Q3heBWS5tdg9", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "def progress(value, max=100):\n", " return HTML(\"\"\"\n", " \n", " {value}\n", " \n", " \"\"\".format(value=value, max=max))" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "KSG_Dc7Gti-w", "colab_type": "code", "outputId": "86ab2dd0-cd14-48f4-f42a-7a7216de26ec", "colab": { "base_uri": "https://localhost:8080/", "height": 34 } }, "cell_type": "code", "source": [ "tox_scores = []\n", "nrows = test_df.shape[0]\n", "out = display(progress(0, nrows), display_id=True)\n", "for offset in range(0, nrows):\n", " out.update(progress(offset, nrows))\n", " values = sp.EncodeAsIds(test_df[TEXT_COLUMN][offset])\n", " tox_scores.append(predict_fn({\n", " 'values': values,\n", " 'indices': [(0, i) for i in range(len(values))],\n", " 'dense_shape': [1, len(values)]})['toxicity/probabilities'][0,1])" ], "execution_count": 0, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", " 30239\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] } } ] }, { "metadata": { "id": "V0YjAtUBWa1p", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "MODEL_NAME = 'tfjs_model'\n", "test_df[MODEL_NAME] = tox_scores" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "P31u4dyyaKKE", "colab_type": "text" }, "cell_type": "markdown", "source": [ "# Evaluate the overall ROC-AUC\n", "\n", "This calculates the models performance on the entire test set using the ROC-AUC metric." ] }, { "metadata": { "id": "o6IClt8eplMn", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "SUBGROUP_AUC = 'subgroup_auc'\n", "BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC = 'background_positive_subgroup_negative_auc'\n", "BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC = 'background_negative_subgroup_positive_auc'\n", "\n", "def compute_auc(y_true, y_pred):\n", " try:\n", " return metrics.roc_auc_score(y_true, y_pred)\n", " except ValueError:\n", " return np.nan\n", "\n", "\n", "def compute_subgroup_auc(df, subgroup, label, model_name):\n", " subgroup_examples = df[df[subgroup]]\n", " return compute_auc(subgroup_examples[label], subgroup_examples[model_name])\n", "\n", "\n", "def compute_background_positive_subgroup_negative_auc(df, subgroup, label, model_name):\n", " \"\"\"Computes the AUC of the within-subgroup negative examples and the background positive examples.\"\"\"\n", " index = df[label] == 'toxic'\n", " subgroup_negative_examples = df[df[subgroup] & ~index]\n", " non_subgroup_positive_examples = df[~df[subgroup] & index]\n", " examples = subgroup_negative_examples.append(non_subgroup_positive_examples)\n", " return compute_auc(examples[label], examples[model_name])\n", "\n", "\n", "def compute_background_negative_subgroup_positive_auc(df, subgroup, label, model_name):\n", " \"\"\"Computes the AUC of the within-subgroup positive examples and the background negative examples.\"\"\"\n", " index = df[label] == 'toxic'\n", " subgroup_positive_examples = df[df[subgroup] & index]\n", " non_subgroup_negative_examples = df[~df[subgroup] & ~index]\n", " examples = subgroup_positive_examples.append(non_subgroup_negative_examples)\n", " return compute_auc(examples[label], examples[model_name])\n", "\n", "\n", "def compute_bias_metrics_for_model(dataset,\n", " subgroups,\n", " model,\n", " label_col,\n", " include_asegs=False):\n", " \"\"\"Computes per-subgroup metrics for all subgroups and one model.\"\"\"\n", " records = []\n", " for subgroup in subgroups:\n", " record = {\n", " 'subgroup': subgroup,\n", " 'subgroup_size': len(dataset[dataset[subgroup]])\n", " }\n", " record[SUBGROUP_AUC] = compute_subgroup_auc(\n", " dataset, subgroup, label_col, model)\n", " record[BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC] = compute_background_positive_subgroup_negative_auc(\n", " dataset, subgroup, label_col, model)\n", " record[BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC] = compute_background_negative_subgroup_positive_auc(\n", " dataset, subgroup, label_col, model)\n", " records.append(record)\n", " return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)\n", "\n", "bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns, MODEL_NAME, TOXICITY_COLUMN)" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "GS9t687KogDQ", "colab_type": "text" }, "cell_type": "markdown", "source": [ "# Plot a heatmap of bias metrics" ] }, { "metadata": { "id": "B5OxkxMqNvaB", "colab_type": "text" }, "cell_type": "markdown", "source": [ "Plot a heatmap of the bias metrics. Higher scores indicate better results.\n", "* Subgroup AUC measures the ability to separate toxic and non-toxic comments for this identity.\n", "* Negative cross AUC measures the ability to separate non-toxic comments for this identity from toxic comments from the background distribution.\n", "* Positive cross AUC measures the ability to separate toxic comments for this identity from non-toxic comments from the background distribution." ] }, { "metadata": { "id": "AGb1CQn2PZVX", "colab_type": "code", "outputId": "15595027-3db8-4526-a4ea-596691143f93", "colab": { "base_uri": "https://localhost:8080/", "height": 1645 } }, "cell_type": "code", "source": [ "def plot_auc_heatmap(bias_metrics_results, models):\n", " metrics_list = [SUBGROUP_AUC, BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC, BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC]\n", " df = bias_metrics_results.set_index('subgroup')\n", " columns = []\n", " vlines = [i * len(models) for i in range(len(metrics_list))]\n", " for metric in metrics_list:\n", " for model in models:\n", " columns.append(metric)\n", " num_rows = len(df)\n", " num_columns = len(columns)\n", " fig = plt.figure(figsize=(num_columns, 0.5 * num_rows))\n", " ax = sns.heatmap(df[columns], annot=True, fmt='.2', cbar=True, cmap='Reds_r',\n", " vmin=0.5, vmax=1.0)\n", " ax.xaxis.tick_top()\n", " plt.xticks(rotation=90)\n", " ax.vlines(vlines, *ax.get_ylim())\n", " return ax\n", "\n", "plot_auc_heatmap(bias_metrics_df, [MODEL_NAME])" ], "execution_count": 0, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 97 }, { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAASkAAAZKCAYAAACEXpf4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xd8FWX2x/HPTSiSAiFA6EVAjnRE\nQAFZitjXsq4oq1gWxbWLXekoCoIookixK7iyq7jq7k9FRRREQKQXj0qTKklICCGEkuT3x1ySCwQS\nJLkzcz3v1yuv3Jm5IV8u5OSZ5848J5CXl4cxxnhVlNsBjDHmeKxIGWM8zYqUMcbTrEgZYzzNipQx\nxtOsSBljPM2KlDHG06xIGWM8zYqUMcbTrEgZzxGRq0Xkg5DtmSJylZuZiuLHzH5hRcp40X3ATSHb\nlwH3uxOl2PyY2ResSBkvigb2hmxHAQGXshSXHzP7Qhm3AxhTiBeAlSKyBueHvwkw1N1IRfJjZl8I\n2CoIxotEJA5oChwEVFWzXI5UJD9m9gMrUsZzROQr4Kj/mKraw4U4xeLHzH5hp3vGi+4KeVwWOAeo\n5FKW4vJjZl+wkZTxBRH5TFUvcDvHifBjZi+ykZTxHBG544hdtYIfnuXHzH5hRcp4UbWQx3lACnCx\nS1mKy4+ZfcFO94wnBd8pSwxulgcmqOr5LkYqkh8z+4FdzGk8R0QGA8uBFcD/gEXAUldDFcGPmf3C\nipTxootVtSGwWFVbAt2BHJczFcWPmX3BipTxojwRCQBlRKSCqi7GeUvfy/yY2Rds4tx40XtAf2Aa\nsExEfgP2uBupSH7M7As2cW48TUTqAVWBpaqaKyKXq+qHbuc6Hj9m9jIbSRlPU9VfgV9Ddt0LePoH\n3o+ZvczmpIzf+HH5Ez9m9gwrUsZv/Dg/4cfMnmFFyhjjaVakjN/48dTJj5k9w4qU8SQRKS8iDQo5\n9Gy4sxSXHzP7gRUp4zki0hv4AfhvcHu8iNwAoKofu5ntWPyY2S+sSBkvuhNoCyQHtx8GjlwKxWv8\nmNkXrEgZL8pR1f0UvCu2z80wxeTHzL5gRcp40VwReRuoIyKPAHOAz13OVBQ/ZvYFuy3GeE7wRt3O\nQCdgP7BAVb9zN9Xx+TGzX9htMcaL1gGf4dy0O0tVc13OUxx+zOwLdrpnvKgpzsJxvYEVIvKKiHi9\noYEfM/uCne4ZTxORJsBA4BpVPcXtPMXhx8xeZqd7xnNE5BzgMuACYAvwH+AhV0MVwY+Z/cKKlPGi\n+4H3gSdVdZfbYYrJj5l9wU73jGccWhxORO6k8JblL7kQ67j8mNlvbCRlvCQh+LlqIce8+tvUj5l9\nxYqU8QxVfTP4MEdVR4QeE5GxLkQqkh8z+42d7hnPEJErgb8BfwK+DjlUFjhDVRu4ket4/JjZb6xI\nGU8JLnXyIjAmZHcusFpVU10JVYTjZF6jqimuhIogdjGn8RRV3aCqfwZScOZ08nBals92M9exiMhZ\nqroBmAjEhnzEAx1cjBYxbE7KeI6ITMK5gvt0YCHQDnja1VDH1g1YAFxVyLE84P/CmiYC2UjKeFFz\nVe2Kc7p0Kc6IpJnLmQqlqoeKZz/gUVX9OzAKp4WVrSdVAqxIGS8qIyIVAUSkmqpuAlq7nKkoU4GO\nwfmpfwPNgTeP+xWmWKxIGS96Abgm+HmFiGwCVrsbqUjVVfU/ODcYv6CqTwKJLmeKCPbunvE0ESkL\nxKvqTrezHI+ILMC5NWYyzjzVQeALVW3nZq5IYBPnxnNEZF0h+3KAtcAAVV0c/lRFGoyzrvlIVU0R\nkUHAeJczRQQbSRnPEZHHgHTgI5x3yC4GqgFfAc+o6jkuxjsmEWkItMK5RmpJcC7NnCSbkzJedJGq\nTlTVLaq6VVVfAc5T1fluBzsWEXkY+BfQHaeofigit7ubKjLY6Z7xomwReQ74FmdU0g4oJyLnAZmu\nJju2y4GzVDUHQETK4NwmM9HVVBHAipTxoquAG3BGJQGcuajLca7kvsbFXMcTwCmoh+RiqyCUCCtS\nxnNUNUNEFgMpqvquiNQM3rfnyXv3gqYDP4jIdzjTKGcDU9yNFBls4tx4joiMAeoBjVX1TBEZBiSq\n6j3uJju+4IWcbXBGUEtVdaO7iSKDTZwbL2qnqtcAGQCqOgw4w9VERRCR1sCzwJPBj9Ei0tzdVJHB\nTveMF5UNXsSZByAiVQGvd115HRgCfIczP9UJ51YZTxdXP7AiZbxoLDAfqCcin+CsiHCfu5GKlKqq\n/w3Z/khE+rmWJoLYnJTxJBGJxblJdx/wk6rudTnScYnIC0A08AXONEoXnNHfhwCqaku2/E42kjKe\nE+z8+w+gEs6pEyKCqvZwNdjxxQU/X3rE/l7YulInxYqU8aJxQH9gs9tBiiu4jlShRMQu6DwJVqSM\nF61T1c/cDlGCxO0AfmZFyniRisi/gLk4S544O/3baDPgdgA/syJlvGhX8KOy20FKiL07dRKsSBnP\nUdXhxzomIh+o6l/Cmce4y644N36TUPRTPMdO906CjaSM33jy1ElE6uJccNoEJ+NqYJyqbgPOdzOb\n31mRMqZkTAfeAabhjJw6Au8DnVT1gJvB/M6KlDElI1tVXwzZXiQiF7uWJoJYkTJ+k+Z2gGNYFFxC\nOPS2mB9FpBmAqnq9JZdn2b17xnOCy54M5fD5ncdVdZWrwY5DRL46zuE8j9/S42k2kjJe5LtlT1S1\nu9sZIpUVKeNFvlv2RESSKXjnsSwQD6xX1dPcSxUZ7HTPeE4kLHsiIq2APqr6sNtZ/M4u5jReFAdU\nwFn25BKgIlAOZ9mTq1zMVWyquhznNNWcJDvdM140tLCdqvpruIMUl4j8m8MvNK0F7HEpTkSxImW8\n6H0KfuDLAQ2BxUA3twIVQ+g1Unk4TSSWuZQloliRMp6jqu1Dt0WkBvCES3GKaxnOQn1tcBqDLgJ+\nwbsdl33D5qSM56nqdqC12zmK8CbO6OlxYDSQg3MphTlJNpIyniMi31NwuhcAqgOfu5eoWOJV9dmQ\n7fki8oVraSKIFSnjRaHv4OUBGaqa7laYYooWkXaqughARM7CzlRKhBUp41XDCZnfEZGhwWVPvOpO\n4PngvXp5wMrgPnOSrEgZL3oVmAjcj/PuXrfgPi+vKtBeVc91O0QksiJlvChaVd8P2X7X67fFAOeL\nyHeq+qPbQSKNFSnjRftFpBcwG2fivAdOJ2MvawesFJE9wP7gvjxVTXIxU0Swe/eM54hIbZy38tvh\nzEl9D3h9TsqUEhtJGS+6QVVvdjvEiRCRWYXszgHWAqNUdUN4E0UOK1LGi5JE5DycEdShUydUNcu9\nSEWaC5QHPsJ5d++i4P5VOBd12npTv5MVKeNFlwBXHLEvD+cePq/qcsTCd/NEZKaqDhaRO1xLFQGs\nSBnPUdUmbmf4HcqLyL3AtzjzaO2BqiLSEeu7d1KsSBnPEZF1hew+NL8zQFUXhzlScfTC6bs3DKco\nrQWuxrnO61r3YvmfFSnjRS8D6RTM71wMVAO+AsYD57gX7Zi241wysY2C5hFrVdXePj9JVqSMF12k\nqn8K2X5FRGap6kgRcS1UEf6FM9r7PrjdD/g7cI1riSKEFSnjRdki8hyHz++UC77j59X1mWqoaufQ\nHSLytVthIondpW286CqcOZ1uwLlAMnA5oHh3ZLJQRPIX6xORMygYVZmTYCMp42V5OCOpLGCfqqa6\nnOcoIa2sAsC9IpId3K4AbAEedDFeRLAiZbzoA+AH4Jvg9tnADOB81xIdg6pWcztDpLMiZbyo7BH9\n6v4tIp5emTPYZv2od/KsvfrJsyJlPENEYoIP54SsgpCH0xzU65PQd4U8LotzmUQll7JEFFsFwXiG\niKynYH7nSHmq6uXbYo4iIp+p6gVu5/A7G0kZz1DVU93O8HsVcn9ereCHOUlWpIznhIyoQuWqamM3\n8hRT6AR6Hs5lE5e4lCWiWJEyXtQi5HFZnDkpz15qHjQO54JOFZFuOE0k9robKTLYnJTxheBtMZ59\np0xEPgGexhlBvYFTtP6mqn92M1cksJGU8RwRGcPhp3u1gHiX4hRXeVWdLSLDgedU9R0R+bvboSKB\nFSnjRStDHufh3MNX2PK8XnKKiFwH9AbaiUgD7BKEEmH37hkvmgWsUdU3cYpUd7z/TtkdQAfgdlXd\njTNpPsjdSJHB5qSM5wRXD7gXOAV4ChgMDPHrNUci8oGq/sXtHH5lIynjRQdVdSnwV2Ccqn6Lv6cm\nEtwO4GdWpIwXlRGRgcBlwMzgEihxLmc6GXa6chKsSBkv6oOzPMuVqpqN0yXmNgARKe9mMBN+fh5C\nmwilqpuA50K2p4cc/gSn7br5g7CRlPEbP7aHSnM7gJ/ZSMr4jSfnd0SkIs5yLUmq2l9EugNLVDVd\nVf/qcjxfs5GUMSXjDZwR06F1zpOAd1xLE0GsSBm/8erpXryqTgT2Q/48WgV3I0UGK1LGb1a7HeAY\nokSkEcHTURG5EIh2N1JksCvOjeeISB1gCFBZVXuJSG/gO1Xd6HK0YxKRpsALOLfG7AGWAf1V9UdX\ng0UAmzg3XvQK8DzwaHB7B86cT3e3AhXDucD1qrrN7SCRxk73jBdFq+onOD33UNVZeP//aiLwsYjM\nEZH+wdGgKQFe/4c3f0wHRKQHEC0i1UXkNjy+yqWqPq6q7YBrgQPAZBGZ63KsiGBFynjRzTg/7FWB\nz3CW4vX8AnLBa6U6Bj9qAkvdTRQZbE7KeNEDwCuqeovbQYpLRL7EKUz/BV5U1fkuR4oYVqSMFy0H\nHhKRZjgjqfdU1eunTv1VdYXbISKRXYJgPCu44kFP4G/An1S1nsuRjnJoQTsRSebwW3YCOA1Nk1yK\nFjFsJGU8KXjd0aXBjzxgvLuJChey4mbb4OoN+YIjQXOSbCRlPEdEFPgV+AD4wMvXHolIVaA68Bpw\nEwW37ZTBOU1t4lK0iGEjKeMZIlJeVfcB5wEpIftjAFQ1y61sx9EU6As0AV4K2Z8LTHUlUYSxImW8\n5HWcSw++pmB+59DIJA9nhU5PUdU5wBwRmaaqX4QeE5EbXYoVUaxIGc9Q1WuDD69W1e9DjwUv7vSy\ndBH5N1AluF0OqAG86V6kyGBFyniGiDQGBHhKRB7l8Pmd8UADl6IVxwvAAJxW67cDfwHsWqkSYEXK\neEkFoB3OgnFXh+zPBYa5EegEZKnqVyKyT1V/AH4QkU9xLu40J8He3TOeIyLNVXWV2zlOhIh8DLwM\nXAVsBtYCD6iqXYZwkmwkZTwjpNPvbBHx24WR1+LMQd0F9AdaAde7mihC2EjKmBIgIjcUsjsHWGv3\n8Z0cG0kZzwn+wJcF3gI+xnnH7FVVneRqsOM7F+gCfIlzuUQ34Hugioj8rKp3u5jN12ypFuNFt+O8\ndX81sEJV2wO93I1UpCpAC1Xtp6q3Aq2BU1T1QpylZszvZEXKeFGOqh7EmYSeFtx3iot5iqMeEBOy\nXQ44TUQSgDh3IkUGO90zXrRYRH4BVFWXisjdOPfyedkYYImI7MI53UsERuCcBj7rZjC/s4lz40ki\nUllV04KP6wHbVPWAy7GOS0QCOKuJBoBUVc1xOVJEsNM94zki0gb4QEQ2ichWnO4xjV2OdVwi0gJn\ngb6PVHUHcLeItHU5VkSwImW8aDzOhZB1VbUWTg++CS5nKsoLwL1AdnB7Jh5dA8tvrEgZLzoYvLUE\ngOB1Rl6flzioqmsObajqaoItuczJsYlz40XpIvIQMBtnfqcHsNPVREVLF5G+QKyInIVzg/EOlzNF\nBBtJGS+6CeeSg4HAYzj/T73e0urvQC2cxfoeBdJx/h7mJNlIKsIFV7XsqaofBbevB2ao6h53kx1X\nFrAMZ34nD1gV3OdZqpopIh/hLNgXhZO7LfCNq8EigBWpyPcuzq0ah1QA3gEudydOsbyDc5o3P/j5\nFuBGoLeboY5HRP4HVMZZASF0NVErUifJilTkS1DV5w9tqOoUEfmbm4GKoY6qdgrdISJe/2GvfGRm\nUzKsSEW+DBG5C/gW5zTkXGCXu5GKtFBE2h9aQlhEzsC5WdfL5vpxHSw/sCvOI5yIVAIexJkfOYjz\nwz5eVTNcDXYcIrIZZxI6E4jGOUVNDR725LpSIvIzTqOIXTivsx/WwPIFK1IRTkT+FHwYOk+Cqnr9\n9KlQInKFqv7H7RwnQkQuV9UP3c7hV3a6F/lC1zEqC5wBLMK/E7r3AL4qUjhXoluR+p2sSEU4VT1s\nHabgJQmvuhSnJASKforn+DGzZ9jFnH88uYCfmwP4cX7Cj5k9w0ZSEU5EknF+SA79Ns8FJrqXyJgT\nY0UqwqlqtSP3iUhPN7KUED+eOvkxs2dYkYpwInIqcAeHt//uCtR1LdQxBBe3OyZV/RWPrXLpx8x+\nY0Uq8r0JvI7TC+5xnNthbnU10bG9j3NqWg6n3fo6nOukTgWWAGer6sfuxSuUHzP7ik2cR74Dqvo6\nkK6q76vqDRx+WYJnqGp7Ve0ArABOU9Xmqno60AT40d10hfNjZr+xIhX5AiLSFUgVkVtF5Fyc3/Je\n1kRVNx/aUNWNOD/0XubHzL5gp3uR73qgJs5FkI8Dl+DcJuNlC0RkIbAA51TqTGC5u5GK5MfMvmC3\nxfyBicgHqvoXt3MURkSaUnA9l6rqSjfzFIcfM/uBFak/MBH5SlW7u53jSMFuMTcAlQh5+15V+7oW\nqgh+zOwXdrr3x+bV31DTcDqtbC7qiR7ix8y+YEXKeNEmVZ3sdogT5MfMvmBFynjRDyIyBpiDszYT\nAKr6f+5FKpIfM/uCFak/tjS3AxxDreDn0En9PMDLP/B+zOwLVqQinIi0BobiXLOTB6wGHlfVVar6\nV1fDHdtQtwP8Dn7M7AtWpCLf6zhtyr/DedepEzAVZ/E7rzp0qwk4t5s0BBYD3dwKVAx+zOwLVqQi\nX6qq/jdk+yMR6edammJQ1fah2yJSA3jCpTjF4sfMfmHXSUU4EXkB54bXL3Bug+qC0x34Q/DPxK6I\nLAzeI+cbfszsRTaSinxxwc+XHrG/Fx6d2BWR7yk4dQoA1YHP3UtUND9m9gsrUpGv0And4DpHXnVV\nyOM8IENV090KU0x+zOwLVqQin18ndIcDbXCWO14kIkNVdZvLmYrix8yeZ0Uqwvl0QvdVnHXY78cp\nrN2C+y52MVNR/JjZF6xI/cGo6vbgtVNeFq2q74dsv+v1dyTxZ2ZfsCIV4Xw6obtfRHoBs3Ey9wD2\nuZqoaH7M7AtWpCKfHyd0++Is0DcIZ37ne+BmVxMVzY+ZfcGK1B+D3yZ0b1BVv/2A+zGzL1iRinx+\nnNBNEpHzcEYj+w/tVNUs9yIVyY+ZfcGKVOTz44TuJcAVR+zLw7l8wqv8mNkXrEhFPt9N6Kqq77qs\n+DGzX1iRiny+m9AVkXWF7M4B1gIDVHVxmCMVyY+Z/cKKVOTz44Tuy0A68BHOKdPFQDXgK5x1xM9x\nL9ox+TGzL1iRinx+nNC9SFX/FLL9iojMUtWRIuJaqCL4MbMvWJGKfH6c0M0WkeeAb3FOUdsB5YLF\nNtPVZMfmx8y+YEUqwvl0QvcqnB523XEm+9cCl+EsO3ONi7mOx4+ZfcGKVITz6YTuXar61KENEUkC\npqjqVcf5Grf5MbMvWJGKfH6c0I0TkbeAW3AW5xsEDHM1UdH8mNkXbPngCCci3xwxoUtwQreHiMxV\nVS8WKUTkKmAUsAroq6qpLkcqkh8z+4EVqQgnIjNxfmgOTei2x1nnfDjwgKpe6GK8wwSba4b+h2wB\nNAD+C6CqD7sQ67j8mNlv7HQv8h2a0O1GwYTu5UAs3pvQXXnE9ipXUpwYP2b2FStSfxx5OCOpLGCf\nF09FVPVNABGpBVyqqpOD248Bb7gY7Zj8mNlvotwOYErdB0A94BtgDnAaMMPVREV7k8NbwC8P7vMy\nP2b2BStSka+sqj6sqv8OfjyAc9rnZRVU9V+HNlT1fzjLzHiZHzP7gp3uRSgRiQk+nBOyCkIezqT5\n127lKqaNIvIMzmR/FM7KDRvdjVQkP2b2BStSkWsVTlEKANcecSwPGBH2RMV3Y/CjJ86Fp/OBd11N\nVDQ/ZvYFuwTBeJKINAeqBDfLA8+qaksXIxXJj5n9wEZSEU5E1nP4dTwAuara2I08xSEik4CmwOnA\nQuBMYLSroYrgx8x+YRPnka8F0DL40Ra4F5jkaqKiNVfVrsAaVb0UOAto5nKmovgxsy9YkYpwqron\n5CNdVT/G200YAMqISEUAEammqpsArzc09WNmX7DTvQhXyG0bNYF4l+IU1ws4V8O/AGwWkR14v6Gp\nHzP7ghWpyLcbp/HCdpxidSFwnauJilYeGALsAn4B6gJfuJqoaH7M7At2uhf5zgU+AxS4CbgdeMbN\nQMXQH2itqi1UtTnQCPD6jbp+zOwLVqQi30FVXQr8FRinqt/i/RH0Zpw1sA5Jwbkx2sv8mNkXvP6f\n1Zy8MiIyEGcp28Ei0h5nSVvPCZk/2wssEZG5we2OwI9uZjsWP2b2GytSka8PznItV6pqtog0BG5z\nOdOxHFr25MjlTr4Pd5AT4MfMvmJXnBtjPM3mpIwxnmZFyhjjaVakjDGeZkXKGONp9u6eRzWoFJv3\nyy2eaeRyXI1f+ZTcfQdY0eF0t6MUW+uNziVNG1YvdznJCYpN8PqqqiXORlLGGE+zImWM8TQrUsYY\nT7MiZYzxNCtSxhhPsyJljPE0K1LGGE+zImWM8TS7mDPCRF12E4H6TYA8cv7zGmwKrrtWMZHo6+4t\neGKV6uT+byp5S+YS6HYZUW3/BDk55MyYUvA1YXDKXQ8R3awVkEf2+KfJ+bFgxZNAUnVihjwNZcuS\n89MasseOgECACg8MJqphYzhwgL1jnyD31w1hy3ukn35Zyx33P8RN1/6NPr17HXZs3oKFPPviRKKj\novjTOZ24s9/NLqX0NxtJRZKGzaBaTXJeGEDO9JeIviLkhyJjJzkThzofk4dDegp5qxZB9bpEtelM\nzriHyXlvElHN2oUtbnTrM4mqU489d1zP3qeHcso9jx52/JQ7HmTf9LfY84/rIDeXQFINypzTHeLi\n2HPHDc7X3PFA2PIeKWvvXp4Y/Qwd2xf+mo0YPZYXxozin6+/zLffLeCXdevCnDAyWJGKIFGntSJv\n5UJnY8cWiImD8hWOel6gfXdyl8+H/dkEmp1J7rJ5kJsLW9aT+9n0sOUtc+ZZHJjzFQC5G9cTiK8I\nMbHBkAHKtG7LwW9nA5D93FPk7dhOVJ365Kxx1pnL3bqZqBq1IMqd/8blypbl5fHPkVSt2lHHNm3e\nQqVKFalZozpRUVF0PacT3y1c5EJK/7PTvWMI9lCbAZwCzAKuBwJAC1XNFJFncFZlfBuYAjQEygJD\nVHWWiDQDXsRZSnY3ThOEBGAqkAm8qKr/LdHQ8QmwOeRULXOXs2/f3sOeFnVWT3ImPw5AIDHJGaX0\nGwTR0eR++AZs21iisY4lKrEqOT+tzt/OS08jKrEquVl7CCRUJi9rj3M62KQpB5cvZt+U8eSu+5ly\nV/dh/7+nElW7HlE16xColEBe2s6wZA5VpkwZypQp/EcoOTWVxMqV87cTExPZtGlzuKJFFBtJHdv1\nwFJVPQdYjVOgCnMtsE1VuwNXAOOC+18A/qGq5wIzgTuD+88ArivxAlWYQCGR6zchb8eWwwtXVBS5\nL48g97PpRF99R6nHOqbQuIEAUVWrs/+9aey5py/RpzWlzNldOLhgLjlrVhL7wuuU69WHnI3rCv97\neo2tgPu72Ujq2JoCs4OPZx/7aXQCuojIOcHtCiJSDugAvCwi4PRkO7Tm9VpVTS3xtAAZO52R0yEV\nE2F32mFPiWrWjryfQu78z9zlFC2A9T9C4tGnLqUlNzWZqMSqBdmqJpGbmgxA3q50cn/bSu5WZ/Rx\n8IcFRJ3aGObPYd8rL7Iv+DVx//yfK6OooiRVq0pKSsE/8287kgs9LTRFs5HUsQUo6Px7MPg59Ndh\n2eDn/cCTqtot+HGaqu4HsoDuwX0dVfWekOeXityflhFo3dHZqH2qU7T2ZR/+pLqNyNu6oeBr1iwm\nIG2cjaTakF469bMwB7+fR5mu5wEQ1aQpuSk7YG+WczAnx5lzqlMPgGhpSu6m9UQ1akKFR4YDUKZD\nZ3J/WuPJUUqdWrXI3LOHzVu3cvDgQb6aM5fOHc9yO5Yv2Ujq2H4EzgLeB3oG92UANUVkHXA2sARY\nAFwO/FNEkoD+qjoAWIbTLfgTEekNJFPafdg2KGxeR/TdT0JeHjnvv0ygfXfYuyd/Qj1QsbIzV3XI\nrz9D07ZE3/0UADkzXi7ViKFyVi4j56fVxL70FuTmsve5pyh74WXk7cnk4JxZZL8wmgqPPQFRUeSs\n+5mD337tfGFUFLGTp8H+/WQ98ejxv0kpWrl6DU8/N54tW7dRpkw0n305ix5du1CnVi3O69GNYY89\nwgOPDQbg4vN7cmr9eq5l9TPrFnMMIpIAfABEA3Nx5p6eBB7A6QacCnyDMxE+CWgWfO4wVf1ERJri\nTKjn4vRkuxaoCLynqkW+z2+L3pUuW/TOP2wkdQyqmg50BxCROOBaVX0ZKGyocUshX78G6HLE7p1A\n+C5EMiYC2JyUMcbTbCRVDKqaCTRwO4cxf0Q2kjLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSlj\njKfZJQheFVeRqL/fW/TzvODdeUTFB6j45ltuJym+LhcDkLctfKuQloRA4zPdjhB2NpIyxniaFSlj\njKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKfZxZwRZuTr01n20zoCgQAD+l5Dy8an\n5h/7cuFSJr33X8qVLcvFndtz3cU9ABjz1nv8sOZncnJy6HflxZx/dtvw5Z38JkvX/EwgEGDgbTfS\nUhoX5P3ueya+M8PJ260TfS67kL3Z+3hs7EukpO9i//4D3H7tlXQ/K7wXOI6c8jZL9WcCBBj4jxto\n2aRRSOZFTJz+H8qVLcPFf+pIn0svcDI/N6kg89/+QvcO4XuN/c6KVBFE5CachqAPFnKsHlBDVReK\nyDjgeVVdH+6MhyxcpWzc9hvvjnyMtZu3MXDCG7w78jEAcnNzGfHKO7w/ZjAJ8bHcOuJ5zj2rDRu3\n7uDnX7fw7sjHSNudyV8ffCI8WTlQAAAgAElEQVRsRWrh8tVs2LKd6eNGsPbXzQx4dhLTx43Iz/vE\nhNeZ8eIoEirG0W/QSHp2bM/i1UqLJg25pdflbPktmb4DngxrkVq4Yg0btm5n+tjHWfvrFgY8P5np\nYx8vyDzpDWaMf4qE+Dj6DX2anh3bsXj1T7Q4rSG3XHUpW3Yk03fgSCtSJ8CK1MnpAcQBC1W1v9th\n5i//kXM7nAFAozo1ycjMIjNrL3ExFUjbnUl8bAyJleIBOLtVU75btobLunak5WnOaKtiTAxZ2fvI\nycklOrr0ZwLmL11Jz47Oku+N6tUhI3MPmXuyiIuNIS1jN/FxMSQmVASg4xktmLdkBVee3y3/67cn\np1KjamKp5zx25tpO5qws4mKCmWNjSawUzNy6BfOWrOTK87q6mtnvrEgVk4g8i9Pw8xSc7jAfAsOA\nAyLyK3A/cBdwFVAJEKAR0B/4BaerTEfgVGA60FFVD5RkxpT0XTRvVD9/O7FSHMnpu4iLqUBixXj2\n7M1mw9bfqJ1UhYUrlfbNmxAdHUVMdHkA3v9yLn9q2zIsBQogOS2d5qcVnI4mVqpIclo6cbExJFaq\nyJ6sbDZs2Ubt6tVYsGw1HVo1y39u7/sG81tKKpOGPxKWrIdlbnxk5l3ExQQz791bkHn5ajq0bFqQ\n+YGh/Ja6k0lDHwprZr+zIlV8G1T1fhGpgNOF+BUReQNIUdWPROT+kOfWVdWLReRC4DZVvUJEPgH6\nAhcA95Z0gSpMaLeyQCDAyLv/zqCX3iAupgK1k6oe1ur0y4VLeX/WXF4Z7N6AMLS9WiAQYNSDdzDw\n2UnExcZQp0a1w46/+9wTrFm7gYdGv8iHE0cTcKnV+lGZ77+dgc9PIS4mhjrVqx32b/Du2OFO5mcm\n8OGLo1zL7DdWpIovUUTm4XQgLqpf9tzg5804oyqAkcC3wDJV/bY0AiYlJpCSXtD4c8fOdJIqF7Rd\n79BcmDrCGXk8O3UGtZKqOGGXrGTy+/9jyqD+xMfGlEa0Y+StTPLO9JC8aVRLrFyQt1Uzpo11uhWP\nfe0dalevxsqf11EloSI1q1WlaaMG5OTmsHNXBlUSKh3155dK5iqVSQ59jVPTqBb6GrdsyrTRQ53M\nb7xL7epVg5krUbNaFSdzTm5YM/udXYJQPGfizD91VdVuwL4inn8w5PGhX5cxwcfVSzxdUOfWzfjs\nux8AWLVuI0mJCcRWOCX/+K0jnid1VwZZ2fv4atEyOrVqyu49WYx56z0mDribhPjY0opWeN4zWzFz\n7gIn78/rSEqsTFxMhfzj/QaNJDV9F1nZ2cxe8AOdzmjJohVreP39/wKQkpZO1t5sKleMD1/mM0Iy\n/7KepCpHZB7ydEHmhYvp1KYFi1b+yOsf/C+Y2TkWzsx+ZyOp4mkAzFPVAyJyGRAtIuVwuhMX9zUc\nCQwFLhKRa1R1ekmHPOP0xjRvWJ+/DRhFVCDA4H7X8sGsb4mLrcB5Z7Xlqp5duOXx5wgEAtx65UVU\nrhjPv2Z+Q9ruTO4bOzn/zxl1d19qVatS0vGO0raZ0Py0hvS+bzBRgQBD7urLjJmziY+N4bzOHeh1\nYQ9uHvCkk/eaK6hcqSK9LzmPgc9N4roHhpK9fz+D77yZqKjw/a5t26wJzRufSu8HhhIVFWDI7X9n\nxudfO5k7tafXBd25edAoAgG4tddlTuaLezLw+Slc9/BwsvftZ/DtN4U1s99Zm/UiBC9BaA10xmmX\n/h+gE5ABvAu8CTwE9KNg4jxFVV8UkRbAi8AjOO3XLxKRKsDXQCdVzTjW921Qu2beus9KvI6VioYX\nXAOBAOvn/J/bUYrt1OCid+tnf+xykhMTaHzmH24iy0ZSRVDVNwrZ/VzI41rBz9OCn1eGfO1KoFtw\n86LgvlSgRYmGNCaC2ZjTGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjjaVakjDGeZkXKGONpVqSMMZ5m\nF3N6WKBsObcjFF8gANFl3U5x4vJy3U5gimAjKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKdZkTLG\neJoVKWOMp1mRMsZ4mhUpY4yn2RXnEWbkK9NYqmsJBAIM7HcdLU9rmH/sy/mLmfivjyhXtgwXdzmL\nPn8+D4Axr09n0WolJyeXW6/6M+d3ahe+vBNfY+man5y8d/SlpZxWkHfeQiZO+zflypbl4m7n0OeK\ni/OPZe/bx6X9+nP7db248oIeYcsLMPLlqSz98Rcn8619aNmkUUHm+T8wcfp/nMxdzqbPpeezN3sf\nj42bTEp6Bvv3H+D23lfQPdhp2hTNRlLHISI3icgzxzhWT0Q6BB+PE5FTC3teOC1c+SMbtv7G9DFD\nePLumxkxZWr+sdzcXJ6Y8jZTht7P1JED+Or7pWxP2cn85Wv4+dfNTB8zhJeHPcjIV6Yd5zuUcN5l\nq9iwZRvTx4/iyfvvZMSEVw/P++LLTHlyEFOfHcFX8xexPTkl//jEae9RKT4ubFnzM69Yw4at25k+\ndhhP3nsLIya/fXjmSW8yZdhDTB01iK8WLmF7SipfLVxCi8YNmTpqEM89ejejwvgaRwIbSf1+PYA4\nYKGqutf2N8T8ZavpeXZbABrVrUVGZhaZWXuJi6lAWkYm8cH25QAdWzVj3rJVXN6tM62aOKOtirEx\nZO3bR05Oblharc9fspyenTo4eevXISNzD5l7soiLjSFtVwbxsbEkBhtodjyjJfMWL+fKC3qw7tfN\nrP11E13POrPUMx6Vedkqep7tfN9GdWuTsWcPmVlZxMXEkJax+/DXuHVz5i1dxZU9/5T/9duTU6lR\nNTHsuf3MilQxiMizQAfgFGAS8CEwDDggIr8C91PQzqoSIEAjoL+qfiIiDwSPRQH/p6rDSyNnclo6\nzRs1yN9OrBRPctou4mIqkFgpnj17s9mwdTu1k6qyYMUaOrRsSnR0FDHR5QF47/Ov6Xpm67AUqPy8\nIadKiZUqkpyWTlxsDIkJldizdy8bNm+ldo0kFixbSYdWzQF4evIbDL6rHx98/lVYch6eeRfNGxcM\nmhMrHnqNneK0Z282G7Zsp3b1qixYsZoOLZvmP7f3g8P5LXUnk4Y8EPbcfmZFqng2qOr9IlIBWKuq\nr4jIGzj99T4SkftDnltXVS8WkQuB24BPgvvPwWkmuk5Enjtez72SEtpTMRAIMKp/PwaOf5W4mArU\nqV7tsONfzl/M+198w6vDHyrtWMd0VN6H7mHg2AnExcZQp0YSecB/Pv+KNs2EOjVLrRH0CQntWhkI\nBBh13z8Y+PwUJ/MRr/G7zwxlzbqNPDR2Ih++8BSBwB+uhd7vYkWqeBJFZB6wH6hWxHPnBj9vxhlV\nAWThNAQ9CFQFEnGai5aopMTKJKfvyt/esTOdapUr5W93aHE600YNBGDsm/+idlJVAOYsXsGkf3/E\ny8MeJD42pqRjHTtvlcok70wryJuaRrXEygV5Wzdn2nNPOnlfnUrt6kl88e18Nm37jdnzF7E9JZVy\nZctSo1oVOrVtHZ7MiQkkp6UfnrlyQkHmlk2ZNnqIk/mN6dROqsbKX9ZTpVJFalarQtOG9cnJyWHn\nrgyqJFQ66s83R7OJ86KdiTP/1FVVuwH7inj+wZDHARGpj3M6eGHw6zeWRkiAzme0YOa33wOwau0G\nkhITiIupkH+837BnSE3PICt7H7O/X0qnNs3ZvSeLMa+/y6TB95MQ5onozme2Yeac75y8P68lqUrl\nw/MOeILUtHSy9mYze/73dGrbiucGPch7E8Yw/YWnueqintx+Xa+wFSiAzm1bFrzGv6w/OvPQ0aSm\n7yIrO5vZC5fQqU0LFq38kdc/cLo7p6TtIit7H5Urxocts9/ZSKpoDYB5qnpARC4DokWkHM6pW3Fe\nv6rADlXNFJG2QH2gVFaza9v0NJo3bkDvh58gKhBgyG03MOPLOcTHVOC8ju3odX43bh46hgBw61V/\npnLFeKZ/+hVpuzPpP3pC/p/z9H23UqtaldKIeHje5qfT/LRG9L73MSfv3f2Y8dks4mNjOO+cs+l1\n0Xnc/OjjBAIBbu19JZWDE9Juatu0ifMaPzicqKgAQ267kRlffOO8xp3a0+uC7tw8+Gknc69LqVwp\nnt4XncvA8S9z3cOPk73/AINvu5GoKBsfFFcg9JzZHE5EbgJaA52BvcB/gE44p2rvAm8CDwH9KJg4\nT1HVF0WkBfAicC7wfzjvBM4FooE2qtrzeN+7Qe2aeetnfVAKf6uSd2qPv0BUFOu/nel2lGI7tZNz\njdj6rz50OcmJCZzW/g83kWUjqeNQ1TcK2f1cyONawc+HLnxZGfK1K4Fuwc0LSjqbMX8UNuY0xnia\nFSljjKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKfZxZxeFQhA+QpFP88LAgHIy4MD\nRd3W6CGHViCIdf9WG3N8NpIyxniaFSljjKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSlj\njKfZxZwRZuSkN1j6488ECDDw9ptoKY3zj30573sm/nOG02a9a2f6XH6h0wL8mQmkpO9yWoBf+1e6\nnx2+ppsjp7wVbFkOA/9x4+Ety79bxMR3P3BalnftSJ9LL3DyPjuxIO/frqT7WW3Dlhdg5IRXWLpa\nnTbrd/Wj5ekhreHnzmfi1H85mXt0oc9f/syCpSvoP+xpGjeoB0CThvUZfM8/wprZz6xIHUdwjfMW\nqvpgIcfqATVUdaGIjAOeV9X14c4YauHy1WzYsp3p455k7a+bGfDsRKaPc1pC5ebm8sSE15gx4WkS\nKsbRb9BIenZqz+JVSosmjbjl6svZ8lsyfR8bEbYitXBFMO+zj7P21y0MGDeZ6c8+XpB34uvMeGEk\nCfFx9BvyND07tmPxqp9ocVpDbul1mZN34FNhLVILl65kw+atTJ8whrUbNzFg9HimTxhTkHn8FGZM\neY6EivH0e2Q4Pc85G4D2rVswfvijYcsZSaxI/X7ea7O+ZAU9O7UHoFG9OmTsDmlbnrGb+LgYEhOC\nLcDbtGDekhVceX63/K8Pdwvw+UtX0bNju2De2k6b9cNalsce3rJ8yUquPK9rQd6U8Lcsn794WX7h\naVS/Lhm7Mw9vDR8X0hq+bSvm/bCM2jWSwpox0liRKoYSaLN+dfA5B4EfVPXe0siZnJZO89Ma5m8f\n1rY8vwX4NmpXr8aCZavo0KpZ/nN79x/EbympTHo8fL/tk9PSD29ZXime5J2hLcv3FuRdvvrwvA8M\n4beUnUwaFt6Oy8k70w5vDZ9QieSdaQWt4bNCWsMvXUGHNi2pXSOJtRs3cfvAEezK2M2dN/amc7sz\nwprbz6xIFc/vbrMuInOAp3DaWGWKyMci0l1Vvyrt0Hkc0bb8wTsZOHbiYW3LD3l33AjWrN3AQ6Nf\n4MOJY1xpAR7aXS0QCDDqgdsZOG4ycTEx1KlxRMvysY87ecdM4MMJT7vWsvyo1vCP9mfg6PHB17g6\neXl5NKhdiztv6M1F3c9h09bt3Hj/QD6bOplyZcu6ktlv7N294jnUZv0TTrzNehPgZ1XNDO6fDZTK\nr9GkKpWPbgEe2ra8VTOmPfs4k594lLjYGGpXr8bKn9exbUcKAE0bNchvAR4OSYlH5N2ZRrXE0Jbl\nzZg2ZhiThz9MXExI3uTUkLy5YcsLkFQ1keSdoa/xTqpVCXmN27Rg2vhRTB45hLi4GGrXSKJ6tSpc\n3KMLgUCAerVrUjWxMjtSUsOW2e+sSBXtpNqsA3nBz4cc6n5c4jq3bc3MOfMBWPXzuqNbgA98qqAF\n+Pwf6HRGSxatWM3r7/8XgJRgS/NwtQDv3LYVM+cucPL+sp6kxCPyDh4V0rJ8cUHL8hkhebPDlxeg\nc7szmPnNt07mn9aSVCWRuJiYgsyPDCtoDT/vezqd2YaPP5/Nq9OdRq/JO9NITUsnqWrpd4iOFHa6\nV7QGnFyb9Z+A00QkXlV3A12BEaURtG1zoflpDendf5DTAvzOm5kxc7bTtrxzB3pddC43PzYi2Lb8\nCipXqkjvS85n4LMTue7+IWTv38/gu24OWwvwts2aOHkfGEJUIIohd/ydGZ9/7eTt1J5eF/bg5kEj\nnbbwV1/u5L24JwPHTea6h4aRvW8/g+/4e1hblrdt0ZTmTRrT+66Hndbw997GjE+/dDJ36UivS87n\n5oeGEgjArddeReVKFeneuQMPjhjLrG8XcODAQYb2v91O9U6AtVk/jpJos66q3UTkSuABnMI2V1Uf\nK+p7N6hTK2/93E9K/i9VCk495yIA1n/9X5eTFN+p3S4FYP2CWS4nOTGBWvKHa7NuRcqjrEiVLitS\n/mFzUsYYT7MiZYzxNCtSxhhPsyJljPE0K1LGGE+zImWM8TQrUsYYT7MiZYzxNLstxqsCAShT3u0U\nxRMIwMED5G5b53aS4gs4v58Dp4Tvvj/z+9hIyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwx\nnmZFyhjjaVakjDGeZhdzRpiRL73C0tU/OW3L7zyiBfi3C5g4LdgCvHsX+lxxidMC/PHRBS3AT63P\n4LtvDVveUe/+j2VrNxEIBHjsb5fQ8tQ6+cfemTWfj79bSnRUFM0b1Oaxv11C1r79DHj1PVIyMokp\nX44n+/6VapXCe0HmU+MmsGzVagIEGHDfXbRqdnr+sS++mcvEN6ZSrmxZLunZgz69/sKerL088vhI\ndu3ezYH9B7jz5hvocnaHsGb2MytSRSis1bqIbAjuyzzGlx35ZzQA3lPVdqUQMd/CZSvZsHkb018c\n7bQAH/MC018cDQRbgL8wmRmTgi3AHxtOz85nAdC+VXPGDwt/C/DvdT0bf0vlnwNvY+3WHQx6fQb/\nHHgbAJl7s3nt0zl8OvJ+ykRHc8vY11m29leWrt1E3WqJjLvjWhb9tIEX//MFw2/8S9gyL1y8lI2b\nNjP95Qms3bCRAU+OZvrLE4Dgazx2PB+8MYWEShXpd/8j9Ox6Dl98PZdT69XlgTv68VtyCjfedT+f\nTn8rbJn9zk73Isj8xcvyC0+j+nXJyHRagAPBFuBxJCZUIioqio5ntGbeD8vcjMv8NWs594ymADSq\nlURG1l4y92YDULZMNGXLRJO1bz8Hc3LI3n+ASrExbPwtJX+01a5JA374eWNYM3+3aDE9u57jZG5Q\nn10Zu8ncsweAtPRdVIyLI7FyAlFRUZzdri3zvv+BygmVSM/YBUDG7t1UDrZhN8VjI6niOVVE/g+o\nCzx3aKeItAYmAAdwOsH0UtWdIvIwTueYXOAxYH3I11wE3A1cqqo5JRkyOS2d5k0a528nViqiBXjr\nFgUtwAeNYNfuTO68vjed27UpyVjHlLJrN83q18rfrhwfS8quTOIqnEL5smW547IenP/IWE4pV4aL\nOrSiQY2qNKlTg29W/MT57Vrwva5nW2r6cb5DKWTeuZPmpzfJ306snEBy6k7iYmNJrJzAnqwsNmza\nTO2aNVjww1I6tG3Drdf/jRn/+5TzrrqOjN2ZTB47MqyZ/c6KVPE0AdoCFYFlwKHikgTcrapLRORx\n4DoR+RSnQJ0NNAQeJdhnT0QaA4OBi0q6QBXmqDbrj9zLwDEvOC3Aa1Ynj5AW4N3OYdO27dz4wCA+\ne2uSO33hQhoXZe7NZsr/vuaTp+4jtkJ5+o55jR83bePKLmeim7fTZ+QU2kkDEivGhT9niKParA9+\nlAFPjiY+NpY6tWpAXh4ffvo5tWok8eq40fz48y8MeGoMM16f7GJqf7EiVTxzVfUAkCoiGUC94P7f\ngKdFJAaoBUzDaaG+QFVzgV+AW4JzUrE4fftuUNVdpREyqUoiyTvT8rePagHeugXTnnd+i4995S1q\nVw+2AO/eBYB6tWpStXJldqTspE7N6qUR8TDVEiqSsqtgWm9Hekb+JPjabcnUrVaZyvGxALRtUp9V\nG7Zwet2aDL3+cgD2ZO9j1pI1pZ4zVFLVqqSk7izInJJKtSoF3Yg7tG3DO5PGAzD2pZepXbMGC5cs\n45yz2gNw+mmN2ZGSSk5ODtHR0WHN7lc2J1U8RzYnPLT9PPC8qnYFDv1qzKHw17UOMAe4o1QSAp3b\ntWHmN/OAY7QAf3R4QQvw74ItwL+Yzav/OrIFeGJpRTw8b/PGzPxhJQCrN24hKaEisRWc5WlqV6nM\n2m3JZO8/4Px9NmyhfvWqfL1cGf/B5wB8PH8pXVo2KfwPL63MHdrx2VffOJn0J5KqViEutuA1vuW+\nR0jdmUbW3r189e08OrY/k/p1arNslVNMt2zbTmyFClagToA1By1C8N29e4F2QCKwKHioOfAdcCXw\nK/AJMB+YgjNiagdUASYB9wHvAe2BWcBIVZ15vO/boG7tvPXffXnCece+/CbfL19FVFQUQ+75B6t/\nXkd8XAznndORmXO+46W3pxMIQN9eV3Bpz25kZmXx4JPPsjtzDwcOHuDOG3rT9awTexPy1I7nwsED\nrP33Syec99n3PmPRTxuICgQY1OdS1mzcRnxMeXq2bc702Qv54NvFlImKok3jejzY60Ky9x+g/0vv\nkL5nL5ViK/DMrdcQH3PKCX/fRtfeB8CGpQtO+GufeWkKi5YsJxAVYOiD97L6p1+Ij43lvG5dmDn7\nGya89hYBAvS97mouu+A89mTtZcCTT5O6M42DOTnce2tfOrZre8LfF4DEWn+45qBWpIoQLFIXAOWB\nxsBonDmmFsC1OAVsLfA68CJwMdAT+CsQAAbgTJy/p6rtRKQR8DFwlqruPtb3/b1Fyg0nU6TccjJF\nylVWpIxXWJEqXVak/MPmpIwxnmZFyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjj\naXaDsZdF+ei6vbw82L/P7RQmAtlIyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjj\naVakjDGeZhdzRpiRE15h6WolEAgw8K4j2qzPnc/EqcE26z260Ocvf3barA97uqDNesP6DL7nH2HL\nO+rfn7Fs/WanzXqvC2jZoHb+sXdmf8/HC5c7bdbr1eKxqy9gR/puBr39EfsPHiQnN49Hrzqf5iG9\n+8LB2qyHlxWpCLJw6Uo2bN7K9AljnDbro8czfcIYINgCfPwUZkwJtll/ZDg9zzkbgPatWzB+uAtt\n1n/awMYdO/nnwzezdlsyg97+iH8+fDMAmXv38drn8/j08bspEx3FLeOnsmzdZj5bsppz25zONV3O\nZMnaTTz/0VdMufu6sGW2NuvhZ6d7EWT+4mX5hadR/bpk7D6yzXpsQZv1tq3cb7Ou6zm3tQDQqGY1\nMrKyydzr3FpzeJv13GCb9QpUjoshPdP5O2Vk7SUhLuaYf35psDbr4WcjqZMgIpVwWlVVAP4P6AcM\nwmmjngOsUtVbRWQBcK2qrhWROsCHqnpmSedJ3plG8yaN8rcTE4pos96mZUGb9YEj2JWxmztv7E3n\ndmeUdLRCpWTsoVm9kDbrcTGkZGQSV6E85cuW4Y5LunL+4PGcUrYsF7VrToPqVbixx9lc8/SrfLRg\nOZnZ+5j6wN/DkjU/s7VZDzsrUifnBmC1qt4rInfgtLCKBS5U1XQR+UZEWgJvA9cATwGXAf8MR7ij\nWoA/2p+Bo8c7bdZrVCcvL6TNevdz2LR1OzfeP5DPpk52p816iMy9+5jy6Vw+GX4XsaeUp++4t/hx\n83ZmL/+JC85sxm0XdWH2ip8YM+Nzxv/jatdyWpv10meneyenKfBt8PFHwc87gQ9F5Ovg8So4RenK\n4PE/U0pFKqlqIsk70/O3j2qz3qYF08aPYvLIIcTFxVC7RrDNeo8uBAIB6tWuSdXEyuxISS2NeEep\nVimOlIzQNuu7qVYpDoC125OpWzWBynExlCsTTdvG9Vj16zYWr91El2bOaLHT6Q1ZuXFrWLIeUtw2\n65PHjiQ+No7aNWuwePnKQtusm+KxInVyAkBu8HEeUA6YAFwTbL2+AEBVU4HNItIeiFLVLaURpnO7\nM5j5jVMzC22z/siwgjbr84Jt1j+fzavTj2yzXqXQP7/E8zZtxMzFqwFY/es2khLiiT3lUJv1BNZu\nTylos75xK/WrJVIvKZHlG5yXb8XGrdRPCk9L+PzM1mY97Ox07+SsxWmn/h5wERAPZKjqdhGpGzxW\nLvjct3EK2JTSCtO2RVOaN2lM77seJioQYMi9tzHj0y+Jj43hvC4d6XXJ+dz80FACAbj12quoXKki\n3Tt34MERY5n17QIOHDjI0P63h+1U74xGdWlerybXjnnNabPe+yI++G4p8RVOoWeb0+l7Xidueu4t\nykRH0aZhHdqdVp/6SYkMevtjPv3BKW4Drr4wLFkPaduqBc1Pb0Lvfnflt1mf8b9P89usX335JfTt\n/xABAtx6w7UkJlTimisuZcCTT9Pn9ns5mJPDsIfvC2tmv7MOxidBRKoCHwIHgM+BvsAcoDmwDFgN\n3Ay0wRl1bQcaqmp6oX9giAZ1a+etXzCrlJKXrFPP6gEH9rP2nXFuRym2Rjc5l1xYB2Pvs5HUyYkF\nHlfVz0SkI9BVVW864jnPAohId+Dj4hQoY0wBK1InZxdwv4gMwRkp3VPYk0RkOHAB8NcwZjMmIliR\nOgnBUdEFxXjeUGBo6ScyJvLYu3vGGE+zImWM8TQrUsYYT7MiZYzxNCtSxhhPsyJljPE0uwTBlIwy\nZYmq26To53mGc6dF3sH9Luc4MX+4y82xkZQxxuOsSBljPM2KlDHG06xIGWM8zYqUMcbTrEgZYzzN\nipQxxtOsSBljPM0u5owwfmuzPvLlqSz98Rcn7619aBnSN/DL+T8wcfp/nLxdzqbPpeezN3sfj42b\nTEp6Bvv3H+D23lfQvUN4+gTmZx4/iaWrfyQQgIH33E7LplKQec48Jr71TyfzuV3p89fLAfh45ixe\needfREdHc8/NN9Ct01lhzexnVqROgoikqGrVI/bdBOxS1Q9E5CpVfS9cefzWZn3hijVs2Lqd6WOH\nsXbTFgaMe5npY4cV5DvlZIQAACAASURBVJ30JjOeH0FCfBz9ho6hZ8czWbz6Z1o0bsgtV/2ZLTtS\n6DtoVFiL1MIly9mweQvTJ41j7YZfGTDqWaZPGleQedwEZrwywWmz/uAgenbpRPny5Xnx9am8/+qL\nZGXt5YXX3rYidQKsSJUwVX0DQETKAffjdJIJi2O1WY+LjTmszTqQ32a9do2kcMU7Ou+yVfQ822nk\n3KhubTL27CEzK4u4mBjSMnYTHxtDYqWKTt7WzZm3dBVX9vxT/tdvT06lRtXwtrSa/8MSenbp5GRu\nUI+M3U6b9bjY2OBr7LRZB+h4ZhvmLVrCKeXL0andGcTFxBAXE8MTD/cPa2a/syJVBBH5Eaf7SwBI\nA7qr6iIR+QyoIiKPA+cDqcClwBAgBacxaEsReQmn7foUoCFQFhiiqiXeCsZvbdaT03bRvPGpBXkr\nxpOctou4GKc47dmbzYYt26ldvSoLVqymQ8um+c/t/eBwfkvdyaQhD4Qla37mnWk0l4JT6MSESiSn\npjlt1g+9xpu2ULtmdRYsWUaHM1oBsDd7H7c/OpSM3bu56+/X0zFMr3EksCJVtB+AFjj98xYBHUVk\nMVAd5y7V91R1iIh8B7QK+boxwFmqeoeIXA9sU9Wbg22wZh3x3FLhtzbroc3VAoHA/7N35nFVVfv/\nfg4qJqAoIGJYaaWrxAnHKzhl2mB5K9Oy8t4Gm8ix0sqxUsshZ1HMLLuVFjev1a97b2WlVk44omn1\nqUzLIRUVxQnN9PfH2sAREfV+4bDP8fO8XryEs/fZ571X8WbtxeY8jHryMQZNmmHzVql82vm8N/Z5\nvv/lV/qPS+GjKS/j8ZTMn956G+E8Hg+jBvZj0KhxhIWGUq1qTO72/VlZJL/0PDt27eL+3s+wYO7b\nJZbZ39Df7p2br4C/AInAFKAZUBdYgxWBrnf22w6En+UYCcDtxphF2Mu/cs7lYJHib5r16IiKZGR6\n582ksnOpBNC07rXMHjOUV5/vR1hICLHRldnw82Z+z7D5rr3yCv7880/2HcjySV6A6KhIMvZl5mXe\ns5fKXpecTePrMXvqeF4dM5yw0FBiY6oQWaki8XVqU7p0KS6PvZTQkHLs23/AZ5n9HS2pc7MIW1J/\nwQpAw7GFtRA4kW/fs/1oPA68JCJtnI+aIlLk7xHid5r1hnWZv2SlzfvzZqIjKxEWUi4v7/Nj2Lv/\nAEeys1m0Yi0JDeqwasMPzPrgvwDsyTzAkexjVKpQ3id5ARKbNGT+om9sZvnJata9x7jfIK8xXk5C\n43haNG1E2pp0Tp48SeaBLI4czaaSs9amnBu93DsHIvKjo0z/Q0QOGmN2ArdjbcWFcZK88U0DbgPe\nNcZEA31FZGBRZ/U3zXrDa2sRd3V1uvZ7kaAgD0Mfv595X3xN+ZBytE9oQpcbr6P7kNF4PB4e7dKR\nSuHl6Xrz9Qya/Br3PTOM7ON/MOTx+wkK8t3P2oZ144gzNema1JcgTxBDn+rBvP/Op3xYKO1bJdKl\n4810f2qAzdytK5WcX1Tc0KYldz/eB4DBfZ/waWZ/RzXr54ExZg720u5xY8zDwDMiUsv7FgRjzFwg\nGWiDXTh/Fata3wjcA0wHagOlgBdE5JPCXtPvNOunTrF54UclHeW8qXH9HQBsXrW4hJNcGJ7o6hfd\nQpaWlEvRkipetKT8B51zKoriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRXoyWlKIqr0ZJS\nFMXV6J/FKEXDnyc4lbGtpFNcAPaeSE8p/RZwOzqTUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0pRVFc\njZaUoiiuRktKURRXoyWlKIqr0TvZAgy/06y/+zHrNm3FAwy8tyN1r7wsd9vsL5fx8bK1lAryEFe9\nGgPv7ciRY8cZMPOf7D1wiHJlg3n54S5UDvediAHg5UnTWLfhOzweDwP79qBe7Wtyt33x9RJS3pxN\ncHAZbml3Hd063877H/+X//fpF7n7bPhBWPvlf3ya2Z/xm5IyxrQBeopI55LO8n/FGLMFqCMih4ry\nuH6nWf/hF37dtZf3Bj/Bph27GfTGXN4b/AQAh45m88YnX/HZ6P6ULlWK7mNfJ33Tb6T//CuXVY5k\nUo9urPpxM1M++JxhD3TyXea16/h16zZSX0tm05ZfGfjSK6S+lgw4Yzx+Ch/Mmm41608NoF2rRLp0\n7ECXjh1yn//Jl4t8ljcQ0Mu9AOJsmnXgNM16UFBQrma9JFn+/c9cH18bgKsujSbr8FEOHc0GoEzp\nUpQpXZojx45z4s8/yT5+nPDQcvy6ay/1rqwGQONaNVjz0xafZl62ag3tWiXazNWv4MDBQxw6fBiA\nzP0HqOBo1oOCgvhL43iWrlx92vOnvvE2Tzz4N59m9nf8ZiblEGaMeQeoD7wPfABMxeqjDgL3Y83A\nfbBOvIbAS8BNQDzQX0Q+NMbcBTzl7LNaRPoYY+KBacAx5+Nu4E9gFlAJO1a9gG1YF1+C89hioAWQ\njjM7MsaMBTYA84A5QCgQAvQSkRXFNTj+plnfc+AQcVfE5uUtH0rGgYOElbuEsmXK0OO267nhmTGU\nLVOGDs3qUSOmMrWqxfDVeuGGxnVZ8cMv7NiTWcgrFEPmvZnEmVp5mSuFk7F3n9WsV6rI4SNH2LJ1\nG7FVY0hbk07T+Pq5+67/7geqVqlM5ciIgg6tnAV/K6nawDXYGeBmrD6qv4ikGWP6YctpIdDA2a8V\nMBuogZV79jLGfAG8DDRwCuVjY8x1wB3ANBF52xjTFogBugCfishMY0xtYJKItDfGjAeeA8oBL4vI\nfmNMQXljgJlOMbYFngXuLIZxKRD/06zn5T10NJtX/72QT0b2I7RcWR4c8xo//LaDO1s1Rrb9zn0v\np9DEXElEhTCf5/TmDM36kGcZ+PIrlA8NpVrVqqe54+d+/F/u6HCj70P6Of52ubdGRI44azkeoLaI\npDnbFmJnSwDrROQY8Dvwo4gcBnZh7cO1gJ+81oMWOc/7CBhijBkO7BaRH7CzpccdPfo08jTq/wCa\nANeKyJxC8u4C7jTGLAZGA8WqBvY7zXrF8uw5kLcst3v/QaIds++mHbu5rHIElcqHEly6NI1q1mDj\nlu0Ely7NC3+/g9kDk3jkljaUK1vktvrCM0dFsmffvrzMe/ZSOTLvP2vT+PrMSZnEq2NfpnxYKLFV\nq+RuS1u7jvi6cT7NGwj4W0nl15p7E4y97Mu/n/fnHuzPNm93WTBwUkS+xBbPD8A/nNnVcewlWo4e\nvanznNLYy7dwY0zOlMNbYJjzWF9gu4i0AJLO5wT/L/idZj2uFp+t+tbm3bKd6IrlCS1XFoDYqEr8\n8nsG2cf/AGDDlm1cERPFV+t+YNK8+QB8vGwtreoWOIMtvszNGvPZwq9tZvnRatZD88b44aeeY+++\nTI4cPcrCxcto3qQRALsy9hBarlyJzFD9HX+73MvPBmNMcxFZBrQGVp3Hc34EahpjyovIQed5I4wx\nPYH/iMhsY4wHO7tKwyrVlzmXezeJyHjgaSAVe7n3FHaWlAVUNcb8gr20XAtEAeud170DW4jFhr9p\n1uNrXkFc9VjuGTGNoCAPQ7rdxgeLVxFW7hLaN6rDQze14v7RMyhdKogGV19B41o1yD7+B3MWLOPu\n4VMJDw1h3OP3+CRrDlazXouuj/bCExTE80/3Zt5/PqV8WBjtW7fgrr/ewkNPPosHD4/+/R4iHM16\nxt59RFSq6NOsgYLfGIzz34JgjNmDXXOaip3FZAIPYhfLe4pIZ2NMHSBZRNrk+7wTtmhOAotFZIAx\n5iZgBHAAu3D+IHAEeBOIxurRewMZ2EX7BOxMdAVwG3CDc0wB9gJfYxXrbwFbsQr2icBw4HnOcQuC\n3xmMT/zBL/96taSjnDdXdu0NwJa1y0o4yQUSWe2iMxj7TUldbGhJFS9aUv6Dv61JKYpykaElpSiK\nq9GSUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRX4+9/uxfAeDj976DdjAdK\nB+OpUaekg5w/p+zfop86erCEg1wY/vJ/RFGiMylFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVo\nSSmK4mq0pBRFcTVaUoqiuBq9mTPAGDn1NdK/EzweD4N6PkLda/JEll8uXk7KO6kElylDh7at6HbH\nraSlf0vfF0ZxdfXLAah1ZXWG9H7Md3mTXyP9ux9s3l6Pnpn37Zy8LenWqSNpa9efmbfP4z7LCzBy\n2uukf/cjHg8M6vEwda+pmZd5SRops9+3ma9rQbfbb8ndln3sGB279yap2110uul6n2b2Z7SkAogV\n6d+yZdsOUqeOZdOvWxk4ZhKpU8cCcPLkSYZPfpV5MyZSsUJ5Hnn2hVwle5P6dZj84oCSyzttnM07\neiKp08bl5Z00nXmvTXLyPk+7Fs3z8g4b6PO8ACvWbWDLtt9JTR5tM7+STGry6LzMU2Ywb/p4m3nA\nMNolNiOmchQAKe+8T3iF8iWS25/Ry70AYvmadbnFc9UVl5F18BCHDh8BIPNAFuXDQomoGE5QUBDN\nG9Zn6er0koxbQN7Drs4LsHzNetolNgOczIcKGeP4eixdvQ6AX37bxqZft9K6WaMSy+6vBORMyhhT\nCpgBXIkVdQ51PnqKyAbHsReFtRf3A8KwOqqrsB69E8BqEeljjHkBqAZcDlTFat0/9dJinQBWicjT\nxpgKwBwgFCsP7SUiK4wxPwOvAh2BskA7x/lXpGTs209cratzv46oGE7GvkzCQkOIqBjO4SNH2bJt\nB7Ex0aSlr6dpg7rExlRh069bSRo0nANZB+lx/z0kNo4v5FWKMm9mvrwVCsi7ndiYKqSt/dbJG23z\nDhzGgaxD9HjAd3kBMjIziat1VV7m8IIy54zxBprWt3/POHr6LIb0epQP5vuHAchNBGRJAfcCv4tI\nd2NMFLAA2HeWfeti1etlsMLPBiJyyBjzsWMxBogVkRuMMXWBtxxt+mCguYgcM8b80xiTiHXyzRSR\nD40xbYFngTux4/yDiLxijHkPuB74sHhOPQ9vXZnH42HUc30ZNGYSYaEhVIupwqlTp6geeyk9/n4P\nN1/Xgq07dnL/UwP57J0ZJWLa9bareTweRg14kkGjJxEWGkq1qk7eapfS4/57uPm6ljbvkwP5bHbJ\n5IXTtdUej4dRz/Zh0CtTnMzRnOIUH85fSIPahmpeynXl/AnUkkoAWhpjWjhfl+Ps9uB1TtHEAT95\nCTsXYS3GAF8CiMi3xphYIA47s/rMGAMQDlwBbACGGGP6YWdMh71e5xvn323O/kVOdFQEGfsyc7/e\nvXcflSMr5X7dtEFdZk+26yfjXvsHsTFVqFI5kg5tWwJweWxVoiIqsXvPXqpVjSmOiKfnjcyfd++Z\neaeMsXlnvElsTDRVKkfRoW0rr7wVfZY3L/N+r8z7qBwZkZe5fh1mTxppM898m9gq0XyxZDlbf9/F\nouWr2Jmxl+AypYmpHEVCo/o+yezvBOqa1HHgJRFp43zUxFqJcyiTb1+wPxS93wkjGGs4hjPH6Tj2\ncjDn+PEiMgfoC2wXkRZAUr7nnPD6vFjecSOxcTzzv14KwMYffyY6MoKwkJDc7Y88+zx7M/dz5Gg2\ni5auIKFRfT7+fBGvp84D7OXX3sz9REdFFke8M/M2acj8r5Z45Y08Pe8z+fM24OPPF/L6e07evb7N\nC/nHeJMzxuXyMj83LC/zspUkNKrPhCH9mTttLKnJY+jcoR1J3e7SgroAAnUmlYZVn79rjInGlkcW\ndk1pA5Do/OvNj0BNY0x5Z72oNVa73g5oAYwxxtQDfsWq1K81xkSLyG5jzIvYNbAoYL1zvDs4++yt\nWGhY51rial1F1579CfJ4GNoniXmffkH50FDat2xOl1tupHv/oXg8Hh69twuVwsO5LrEp/UaMZcGS\nNP744wTP933CZ5dONu/VdO3RjyBPEEP7Ps68T76gfFgI7Vsm0OXWG+neb4jNe18XKlUM57rEZvQb\n/goLliznjxMneP5J3+UFaBh3jR3jXs8SFBTE0N6PMu/TLykfFkr7Fn+hyy3t6f7sCzbzPXdSKbyC\nz7IFKgGpWTfGlAamA7WBUsAL2NnQBOAnYBN2jWoRdjG9s/O8nMXwk8BiERngLJxfDVQAagB9ReRL\nZ9+B2BnaWqAX0Bh4C9gKJAMTgeHA80AdZ61rLLBBRN4s7ByqX1bNvzTrwOYVC0s4yflTo2kbADYv\n+6Jkg1wgnmrXXnTvexeQJVWUOCW1R0SSffm6WlLFi5aU/xCoa1KKogQIgbomVWSIyAslnUFRLmZ0\nJqUoiqvRklIUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI3egqAUEafgj2Pn3s01OPdElvbpXy4p\n/wM6k1IUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6MlpSiKq9GbOQMMv9Os\np8wi/fsfbd4nHqKuyfPwfbl0BSmz59q8bRLpdnuH3G3Zx47R8ZEnSbqvM51ubOuzvAAjk2eQvjFH\nDf8Yda/1HuNlpLz1HsHBZejQtjXdOnUE4OPPFzLz3bmUKlWK3g91o03zpj7N7M9oSRWAMeYmoIaI\npJxjv1uBzsBzwIsi4rvv7gLwO836uo1s2f47qZNHsunXbQwcN5XUySPz8ibPZN60V2zegSMcZbk1\nw6TMnkt4+TDfZ07/li3btpOaMp5NW36zaviU8XmZJ6Ywb+YUm/mZobRr0ZyyZYNJfnM2/3ptMkeO\nZjPljXe0pC4ALakCEJFPL3D/nUCJFhScXbMeFhpymgIcyNWWx8aUnLBy+dr1tEto6uStlqsst3kP\nUj7UK298PZauWUenG9taZflv20pEWb58dTrtWjS3matfni9zQWO8lkvKBpPQKJ6wkBDCQkIY3r+3\nz3P7M1pSBWCMeQC4FaiMNcvUB9aKyMM5FmOsbWaTs391YK6INDbG3Ic1x/wJbBSRR53jtXCOZ4BX\nROT1os7td5r1zP35lOXhZGTud/JW4PBRr7zrNtC0XhwAo1/9B0N6PswHny/ySc7TMu/LJM7rkjQi\nPJyMffvyxviotxp+PU3j6wFw9Ngxkga8SNbBQ/R88D6aN2rg8+z+ipZU4TQC7gZ2A9uMMRWBIcAL\nIvKRMaagy8FQ4CYR2W+M+dopNbA69wSgJvAeUOQllR//06zny9u/F4PGTXPyOsryzxe5Sll+inyZ\nBzzNoFETCQsLoVrVmNxz2n8gi+QRQ9ixazf3932OBf98E4/nohO//E9oSRXOz86lHMaYHVg9em1g\nqbN9EXBzvufsAz5y9OvXAjl63WUi8qcxRjXrOXkLUpZHeOWtH8fsCSNs3tffcZTlaXnK8j17CS5T\nhpjKkSQ09I0R+Iwx3pNPs96gLrOTX7GZZ8wiNqYK2ceOEV/nWkqXLsXlsVUJDSnHvv0HiKxU0SeZ\n/R29BaFwTuT72uN8FKhfN8YEA1OBu0WkNdakXNCxVLMOJDaqz/xvltm8P/1yprJ84Aj2Zh6weZev\nIqFhPSYMfpq5U8eQOmUUnW9uR9J9nX1WUOCo4Rcttpl//JnoqHxj3H/IGWr4Fk0akrZmHSdPniTz\nQBZHjh5Vs/EFoDOpC0ewpuLPgOvybSsPnBCRncaYy5z9fPaGRX6nWY+7hriaV9K1z0Cbt9cjzPts\ngc3bohldbm5H9+eG2bxdO7niG7thndrEmZp0feJpgoI8DO37BPM++dxmbpVAl4430b3fYDzAo/fd\nRSVnEf2GNi24O+kpAAb3SSIoSOcH54sajAvAa+G8uog0dh5bhb3dIAKYBWwHfgHCsBr3nIXzN4E4\nYB3wHdAdq1s3ItLPGBOG1axXLyyD/xmMT7F5yWclHeW8qZF4EwCbV35VwkkuDE/MVRfdQpaWlEvR\nkipetKT8B51zKoriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRXoyWlKIqr0ZJSFMXV6J/F\nuBm/+it5D5QNLekQ588p588vsw+VbA7lnOhMSlEUV6MlpSiKq9GSUhTF1WhJKYriarSkFEVxNVpS\niqK4Gi0pRVFcjZaUoiiuRm/mDDBGJr9G+nc5CvBHz9Ssv52jWW9Jt04dSVu7/kzNep/HfZd3yquk\nf/c9HjwM6v04da81eXm/WUbK23OcvG3odudfAfh4/gJmvvu+VZZ3/xttmjfzWV6AkdPfJP2Hn2zm\npAfyqeFXkvLuPILLlKZD60S63XZT7rbsY8fp+NjTJN17J51uaOPTzP5MQJfU+erSA4Vczfq0cVaz\nPnoiqdPGAY4CfNJ05r02ydGsP59r4m1Svw6Thw0sgbzrHWX5REdZPp7UlIl5eSdOZd7ryVSsUIFH\n+g+mXcvmlC1b1irLZ07hyJGjTJn1jk9LasX679iyfSepE19i02/bGDg+hdSJL+VlnvoG86aOpmKF\nMB4ZPJJ2CU3y1PBz/lUianh/J6BL6kJ16f7OmZr1w+ehWY8uubyr02nX0ktZfvAQhw4fJiw01Mkb\nRkRF66Zr3ijeKsuDy5LQ2FtZ3se3mdd+S7uEJjbz5dVOH+Osg5QPs/ZlgOYN6rB07bd0uqENv/y2\n3arhm/rGDh1IBHRJOdaXOti1t6bAJcB0EZnpWF0OAdcAUcCDIrLWGDP+LPvuwBqNLwfuE5E1xpge\nwL1YD9+HIjLOGBMPTAOOOR93Y5Xrs4BK2DHvJSLri/p8M/Zl5tOsVyhAs56jAP/W0axHW836wGEc\nyDpEjwd8qFnfl0lcrZpeeXO08KGOsvwIW7ZuJ7ZqFdLWrqNpA0dZnp1N0nPPk3XoED0f7EbzRr77\nxs/I3E9czSvzModXyFPDh1fg8NFstmz/ndgqlUlbt5Gm9WoDMPq1txjSo3uJqOH9nYAuKS+2iMhT\nxphywCZgpvN4aRFpZ4zpCAw1xtxTyL5lReRGY8zjwN+NMZlYxVULZ/sSY8z7wIPANBF52xjTFogB\nugCfOoVXG5gEtC/uk/YWAVkF+JMMGj2JsNBQqlV1NOvVLqXH/fdw83UtrWb9yYF8NtslmvWB/Rg0\neryT10tZnnWQ5BFD2bFrF/f3eZYF779VYsryMzTr/XowaFyKlxoePvz8KxpcW4tqJThr9WculpKK\nMMYsBY4Dlb0e/8L5dxkwWkSyjTFn2/cb599tQDPsbKsmsNB5vDxQHfgISDHG1AJSReQHY0wCUNkY\n083ZN095W4RYbbm3Zn3vmZr1KWMAGDfjTWJjoqlSOYoObVsBOZr1ij7WrO/Ly3uGsrwes5Ptmtq4\nV98gtmoVso8d91KWX+pzZXl0ZCUyMr3V8Jmnq+Hr1Wb2+GE28xtziK1SmS+WrGDrzt0sSluTp4aP\niiChYT2fZPZ3LoZbEBoBbYHWItIGewmWQ875e4BTxpjWheybX5N+HPiPiLRxPuqKyNci8iXQBPgB\n+Icx5jpn315e+zYt+tN0FOBfLQFyNOuRpyvAn8mvWW/Ax58v5PX3HM36Xh9r1ps0Yv5XjrJcfipA\nWT7YK28aCY3iC1CWZ/vUbJzYsD7zv1luM//0C9GRlU5Xww96mb37D3AkO5tFy1eTEF+XCYOeZO6U\nkaROeonON7Ul6d47taAugIthJlUdWCoifxhj/gqUMsbkqM9bAv8EmmNtw1HA1rPsm5/VwGhjTAhw\nFGspfg5rLP6PiMw2xniAeCANuB1Y5lzu3SQi44v6RK1m/Wq69uhHkCeIoX0fZ94nX1A+LIT2LRPo\ncuuNdO83xGrL7+tCpYrhXJfYjH7DX2HBkuX8ceIEzz/pQ8163drE1apJ16QnrbL8yZ7M+2S+oyxP\npMutN9H96YFO3rvzlOWtW3D3430B3yvLG8YZq4bvO9hm7tGdefMXUT40hPaJTely8/V0HzDCUcPf\n7go1vL8T0AZjZ+G8PpCILZIPgQQgCygF/AFUBS4DugG/AZ+fZd+5IvJvY8ytQGcRecAY8wTwEHZh\n/EMRGenc9jACOICdiT0IHAHeBKKdY/UWkVWFZa9+WbVTm1csLGwX11Cj6XUAbF71zTn2dA81GiUC\nsHnxJyWc5MLwVK/vT++EWCQE+kwqGDiW7/JqAoDzG7uPROTf+Z5zxr7eOPv/2/l8GvY3ed7bPwUK\nuvXhzgsNryhKAK9JGWOaA8+StziuKIofErAzKRFZBlxVyPYHfJdGUZT/lYCdSSmKEhhoSSmK4mq0\npBRFcTVaUoqiuBotKUVRXI2WlKIoriZgb0FQfM0pOHa4pEOcPx7n53O58iWbQzknOpNSFMXVaEkp\niuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRmzkDDL/TrKfMIv37H23eJx7K\npyxfQcrsuTZvm0S63d4hd1v2sWN0fORJku7rTKcb2/osL+So4X9w1PCPFaCGf9cZ49b51PBzvdTw\nxeLiCEguqpIyxrQBeopIZ6/HJgKTRGRzMb3mRyJyW3EcOz9+p1lft5Et238ndfJINv26jYHjppI6\neWRe3uSZzJv2is07cATtEpvlKctnzy0RZblVw+8gNWWCo4afQGrKhLzME6cx7/Upjhp+iJcafg7/\nmjmZI0eymTLrbS2pC+Civ9wTkb7FVVDO8X1SUHB2zTpwmmY9KCgoV7Nekixfu552Cfab9aorqpF1\n6JBX3oOUD/XKG1+PpWvWAfDLb9ussrxZI99nPosa3mbOU8MHBQXRvFEDlq5OZ9mqtSQ0bkBYSAjR\nURE+V8P7OxfVTMohzBjzDtYi8z7Ws9cTKMOZevS+QDWsWr0q0F9EPjXGPI21FwcB/xWRF40xLwDh\ngMG+bXFfEfnEGLNHRKK89OsnsYqt/kV9Yn6nWc/cT1ytvHd4jggPz1OWV6zA4aNH2bJtB7Ex0aSt\n20DTenEAjH71Hwzp+XCJKMsvTA2/nqYN6gJwNPsYSc+94Kjh7/OpGt7fuRhLqjZwDbZgNgMbnccL\n0qMDxIrIDcaY0qfFKAAAIABJREFUusBb5JlgWmAL5xdjTI5V5jIR6eBorR4HvH1Jk4HHRGS9MeYt\nY8wVIvJrcZ0kBIBmvX8vBo2b5qUsP8WHny+iQW1DtapVfJ6vIM5Uwz/NoNETvMbYbtufleWlhn+O\nBe//o8TU8P7GxVhSa0TkCIAj78yhID06wJcAIvKtMSbW2fcI8BXWahwF5LjBFzv/bsPOqrwxIrLe\nOdbfi/aULP6pWfdWlu87XVleP47ZE0bYvK+/Q2yVaL5YksbW33exaPmqPGV55UgSGtYv9rw2c+Tp\nY1ygGn6szfzqLGKrRjtq+Nolpob3dy7GNakTBT14Fj065BsjY8wVwFNYC3EbwHs2lF/F7s3J/0Pm\n88LvNOuN6jP/m2U270+/EB0ZcbqyfOAI9mYesHmXryKhYT0mDH6auVPHkDplFJ1vbkfSfZ19VlCQ\nM8Y5avifC1DDD3GdGt7fuRhnUgVijOnJmXp0sJd1Y4wx9bCFFAXsFpFDxpiGwBVYCem5+M4Y00xE\n0owxrwNjReT7ojwHv9Osx11jleV9BhLk8TC01yPM+2yB1ay3aEaXm9vR/blhjrK8kyu+sa0a/mq6\nJj3lqOF7MO+Tz61mPVcNPwiPBx697658avgnAd+r4f2dgNas5yf/LQjGmD3ABuzCeTXO1KMnAVcD\nFYAa2IX0RcB/gTDs5V0poIHz+R4RSTbG1AGSRaSN18J5XSDFibJcRPoVltX/NOun2Lzks5KOct7U\nSLwJgM2rvi7hJBeGp8qVF91C1kVVUheK8xu7PSKS7OvX1pIqXrSk/AedcyqK4mp0TaoQROSFks6g\nKBc7OpNSFMXVaEkpiuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5Gb0FQiggPBF9S0iEuAOcm5uPZ\nJRtDOSc6k1IUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6MlpSiKq9GbOQMM\nv9OsJ88gfaPg8cCgXo9R91rvvMtIeSuV4OAydGjbim6dOgLw8ecLmfnuvyhVKojeD3XzuQ145LQ3\nSP9eHDV8d+pek+fh+3JJmqOGL02H61qeqYZ/uA9J3e7yuRren9GS+j9gjBmLfY/0nUANEUk5x1OK\nFb/TrOfkTRnnKMsnkZrilXfidObNnGzzPmPzli0bbJXlr03iyNFsprzxjk9LasW6DWzZvoPUKaPt\nGI9NJnXK6LzMya8xL2WczTxgOO0SmxJTOQqAlNnvE16+vM+yBgpaUkWAiHx67r2Kn7Np1sNCQ07T\nrAO5mvXYmOiSy7s6Pbcor6p+ea5m/ex513JJ2bIkNLLK8rCQEIb37+3bzGvX0y6xmc18xWVkHco3\nxqHemeuxdM16Ot3Y1qrhfy0ZNby/E9AlZYwpA/wDq53KBh4CpgKhQAjQS0RWGGN+Bl4FOgJlgXZY\nb96cAvbtBjyLFYAeBTYYYx4A6ohIP2PMeKApcAkwXURmGmPeBHYAjbDK9vtEZE1Rn6/fadb3ZRJn\nvPKGh5+e96h33vU0jXeU5ceOkTTgRbIOHqbng/fSvFEDn+S1mfcTV9NbDV+BjMz8mR01fPq3NK1f\nB4DR099kSK9H+GC+f8g13ERAlxRwP7BTRO41xnQFbgdmisiHjkr9WeBO7Dj8ICKvGGPeA64Hvsu/\nrzGmM/Ay0BjIBFZ7v5gx5hJgi4g8ZYwpB2wCZjqby4rIjcaYx4G/A0VeUvnxO806+ZTlA55i0KhJ\nhIWF5OYF2H/gIMkjBrNj127u7zuABf+cVWLK8jM068/0ZtDYZEcNbzN/OH+hq9Tw/kagl1RD8jTp\n7xljwoFkY0w/7IzpsNe+3zj/5ijSdwFD8u0bCRwUkd0Axpgl3i8mItnGmAhjzFLgOFD5LMdvVnSn\nmIffadajIk/XrO8pIG+yd94qjrL8WkdZXtXnyvLoyAgyMr3V8JlUjvDSrNevw+yJL9vMM98mNiaa\nLxbnV8OXJiYqkoRGvjMv+zOBfgvCn5x+jn2B7SLSAiv+9Ca/Ir2gfT2crkvPr2BvDbQFWjsK9mOF\nHL/I8TvNepN45i9anJc3Kl/e/kPPyNuiSXw+ZflRn5qNExs3YP7XOWr4TURHVjpdDT9gWF7m5StJ\naFifCUP6MXfaK6Qmj7Zq+G53aUFdAIE+k1qJLY33jTG3AoOBJ5xtd1C4Hj0KWJ9v371AuDGmInZm\nlQgsy/ecrSLyhzHmr0ApY8z5KNiLBL/TrNepTZy5mq5PPE1QUBBD+yY5yvJQ2rdKoEvHG+nebzAe\n8vIC3NCmBXcnPQ3A4D6P+1RZnquG7/2cVcP3ftRRw4fQvsVf6NKhPd2fe9FmvudOV6jh/Z2ANhg7\nBTETu3D+BzAcmA5sBZKBic5jz2MXvg953VawEXirgH09QB9gC3bhPOc3e3Wc7Z87j38IJABZWBX7\nXBH5t1OWnUXkgcKy+5/BGDavXFSyQS6AGk1aA7B56eclnOTC8FxW+6IzGAd0SfkzWlLFi5aU/xDo\na1KKovg5WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVoSSmK4moC/c9i/JsS+sv+\n/4lTJyH78Ln3cws5Y1s2pPD9lBJHZ1KKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVo\nSSmK4mq0pBRFcTVaUoqiuBq94zzAGJk8g/SNgscDg3o9Rt1ra+Vu+3LxMlLeSiU4uAwd2raiW6eO\nAHz8+UJmvvsvSpUKovdD3XyqLR85/U3Sf/gJDx4GJT1AXS9Z6JdLV5Ly7jyCy5SmQ+tEut12E0ez\njzFg7FT27D/A8eN/kHTvnVz3F99agUdOnk76dz/YMe6dRN1rTV7mb5aS8ta7BJcpQ4frW9PtztsA\n+Hj+AmbO+SelSpWid/e/0yahWKxmAYmW1FkwxtyLFTQ8LCLfeD0+EZgkIptLLNxZWJH+LVu27SA1\nZRybtvzGwNGTSE0ZB8DJkycZPnE682ZOpmKF8jzyzPO0a9GcsmWDSX5zDv96bRJHjmYz5Y13fFZS\nK9Z/x5btO0md+BKbftvGwPEppE58KS/v1DeYN3U0FSuE8cjgkbRLaMKajUKdWlfx8F23sX1XBg8N\nGOHTklqxdj1btm0ndfpEO8ajxpM6fWJe5olTmTdzKhXDK/BIv8G0a5lA2bJlSZ71Dv96PZkjR44y\n5Y23taQuAC2ps9MOeNa7oABEpG8J5Tkny1en065FcwCuqn45WYcOcejwEcJCQ8g8kEX5sFAiHC1U\n84b1Wbp6LZeULUtCowaEhYQQFhLC8P69fZd37be0S2hi815ejayDh/PyZh2kfFgIERWtEqp5gzos\nXfstnW5ok/v8nRl7iYmKKOjQxZd59VratUywmatfTtbBgxw6fJiw0FBnjMOIcESlzRs1YOmqtVxS\nNpiExvF5Y/yMa/8XciVaUoAxpgIwBwgFQoB5QAegiTEmE6vFWgPMB/4G9MSaiGcDFYADQFegIvC2\nc9gywP0isskY8zNWcZUI7AduERFvyWiRkLEvkzivy6WI8HAy9mUSFhpCRMVwDh89ypZt24mNqULa\n2vU0ja8LwNFjx0ga8CJZBw/T88F7ad6oQVFHKzhv5n7ial7plbcCGZn7bd7wChw+ms2W7b8TW6Uy\naes20rRe7dx9u/YdzK49e5k+7DmfZM3NvC+TOFMzL3PFcDL2ZhIWan8AHD5ylC1btxNbtQppa9fR\nNL4eAEezj5H03PNkHTxIzwf/RvPG8T7N7c9oSVligJki8qExpi3QA+vTmysiXxljrgRuF5GNxpi/\nOc/pB3wmIpONMU9iZ17bgWEistAY8xBWRPo0cCXwloj0M8YsB+oB6cV9UqfI05V5PB5GDXiKQaMm\nERYWQrWqVcjRme0/cJDkEYPZsWs39/cdwIJ/zsJTAu/AcEbefj0YNC6FsNAQqsVE4y1fe2/iCL7f\ntIX+Y6bwUcorJZIXwNsI5/F4GDWwH4NGjSMsNJRqVWNyt+/PyiL5pefZsWsX9/d+hgVz3y6xzP6G\nlpRlFzDEGNMPKIu1Ex/02n5YRDbme05DYAiAiEwAMMZcBkw2xrwIVAJWO/tmiUiODXkbEF4cJxEd\nFUnGvv25X+/es5fKkZVyv27aoC6zk8cAMG7Gm8TGVCH72HHi61xL6dKluDy2KqEh5di3/wCRziVL\ncRIdWYmMTK+8ezOpHOGVt15tZo8fZvO+MYfYKpXZ8NMvRIZXoGp0FNdeVZ0///yTfQeyiKxYLEN6\nZuaoSDL2ZeZl3rOXyl6XnE3j6zF76nibefobzhgfI75ObWeML/XpGAcCeguCpS+wXURaAEkFbD9e\nwGN/cub4DcPOrloBL3o9fiLffsXyIzSxSTzzFy0GYOOPPxMdFUlYSN77JT3Sfyh7M/dz5Gg2i5au\nIKFRA1o0iSdtzTpOnjxJ5oEsjhw96jM1eGLD+sz/ZrnN+9MvREdWIiykXF7eQS+zd/8BjmRns2j5\nahLi67Lq2++Y9a9/A7DHOZdKFcr7JC9AYpOGzF9klyk3yk9njnG/QV5jvJyExvG0aNqItDXpXmOc\nrfr1C0BnUpYoIGemcwcQfB7PWQm0BVYaYx4Dsp3jbDLGeIDbsHp1n9GwTm3izNV0feJpgoKCGNo3\niXmffE750FDat0qgS8cb6d5vMB48PHpfFyo5s48b2rTg7qSnARjc53GCgnzzs6thnCGu5pV07TuY\noCAPQ3t0Z978RZQPDaF9YlO63Hw93QeMwOPx8GjX26kUXoGut9zAoPEp3PfUULKPH2dIz+4+ywvQ\nsG4ccaYmXZP6EuQJYuhTPZj33/mUDwulfatEunS8me5PDbCZu3X1GuOW3P14HwAG933Cp5n9HdWs\nA8aYJsBbwFYgGZiILZgeIvJvY8weEYly9l2EXTjf6jwnHHtpeC/QGhgLbAGmADOAB4E5Xs+fCySL\nyKLCMlW/rNopf9GW12jSBk6dZPPiT0o6ynlTo9UtAGxetbiEk1wYnujqF91ClpaUS9GSKl60pPwH\nnXMqiuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdXon8UoRUOp0ngq\nX17SKc6fo4cBOPn5uyUc5MIodd+Ako7gc3QmpSiKq9GSUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0p\nRVFcjZaUoiiuRktKURRXozdzBhj+pln35sefN/HEU8/wwL1d6da1y2nblqatYHzydEoFBdGqRQI9\nHnmoRDICjPpsBeu2ZeDxwIAbm1E3Nip325yV3/Px+l8oFeQh7tJIBtyYZyrec+got077gMl3XUfT\n6lVLIrpfojOpQjDGvGmMubWIj9nTGPNCUR4zB2/N+kvP9GHE5Fdzt+Vo1meMeZF3Jo9m4dIV7Ny9\nh8wDWSS/OYfZyWOYPuoFvly8vDiinZMjR48yfMw4mjdpXOD2EWPGM+WVkbw7awZLlqXx8y8lY7lf\nuWUnv+7L4t3utzC8YyIvf5qWu+3QseO8sXQDbz94M+882IFNGQdYt2137vaxX6yiWiXfmW0CBS2p\nAOJsmnXgNM16UFBQrmZ92er0XM16dGSETzXr3gSXKcNrk8cTXTnqjG1bt20nPLwCVWOqEBQUROsW\nCSxbsbIEUsLyzb9zvbF//nNV5YpkZR/j0DFrPCtTKogypUpx5PgfnDh5kuw/ThBermzu80KDy1Ar\nWl17F0rAXe4ZYx4AbsLqz6sBE7DevF5YV95GEXnU2e9m4FKsIv1poClwCTBdRGZ6HTMNuNdRplcD\nPsKq1+92dqmJtcyMwRpirsRq1oeKyAJjzPVYA81O4Hfgl+I4d3/TrHtTunRpSpcu+H/HjL17iaiU\nJw2NiKjE1q3bfRXtNPYcPkrtSyNzv64Ucgl7Dh0lrGwwZUuX5onW9blh8r+4pExpbo6rQfXIcI7/\n+SfTvkonuWtbRn22okRy+zOBOpOKA/6K9eKNAEKBm0QkEbjGGFPX2e9yoBWwF9jiyEFbYiWf3rxN\nXiH9FXhXRFJEpA3QDdgNpGC1Vr+LyHXA7dhiAhgJdBOR9lg3n084m2a95+ARuZr1U6dOsf/AQaYM\nH8zIAU8ycNREXG8Qcmm+Q8eOM2Pxt3zSsxPze9/Jt9sz+GHnPmYu/pbODWtR4ZKyJR3RLwm4mZTD\nVyJyAthjjMkE9gMfGWMArgVyfhSuFJFTQLYxJsIYsxQ766qc73jvAp8BLwO3Ao8AGGOCgH8AvUVk\nvzEmAWhpjGnhPK+cMSYYqC4i63KyAeUoBvxNs36+RFeOYs+evblf79qdUeBloS+oHFaOPYeO5n69\n++ARKodZg/GmjANcVjGMSiGXANDw8ips/H0vSzbt4M9TJ5mz8nu2Zh5k/fY9TOjchprRlQp8DeV0\nAnUm5X1epbAlc7eItAbSvLYdBzDGtMbOulo7s6Nj3gcTkb3ANkciGiQiOdcaA4AlIvKN1/FeEpE2\nzkdNETkOnDxLtiLF3zTr50u1Sy/l0OHDbNuxgxMnTrDwmyUkNm927icWA4lXxTL/uy0AfPf7XqLL\nhxBatgwAsRXD2LTnANl/nABg4449XBFRgdkPdeC97rfyXvdbaV2zGkM7/EUL6gII1JlUc2NMKaAS\ndl1qt4jsNMZcBjTmTI16FLBVRP4wxvwVKOXMgLx5G5iKXXPCGNMMuAFbbjmkYfXq7xpjooG+IjIQ\n2G7sNO5HoA2wrOhONQ9/06x7s+G7Hxg9YRLbd/xO6dKl+ezLBbRt3ZJql15K+7ZteGHAMzw9YCgA\nHW5oR40rSua9q+IviyauaiT3vvEfgjweBt/8Fz5I/4nylwTT7poreCihDg+89Smlg4JoUC2axldU\nKZGcgUTAGYydBfHbgFPA1cArwPXYdap1wHdAd+x6kRGRfsaYcOBz4CjwIZAAZGFnYXMd1XowduH7\nSufS7jNsAWY4L70YeAGYDtR2nvuCiHxijLnJyfGrc4xtIvJCYefhdwbjoCC2fLfunPu6herVrwRg\n08RnSjjJhVHqvgEXncE4UGdSm0Skn9fXb+fbPt77CxE5gP3NXg4TCjhmIvCxiOx3nnPjWV774fwP\niMinwKfnCq0oypkEakkVKcaYF4EbgTtLOouiXGwEXEmJyJvFcMzngeeL+riKopybQP3tnqIoAYKW\nlKIorkZLSlEUV6MlpSiKq9GSUhTF1WhJKYriagLuFoSAwuNHP0NO/smpfb+XdIrz55JQAIKuv6uE\ngyjnwo++CxRFuRjRklIUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6M3cwYY\nI6e8Svp3P+DBw6Dej1H3WpO77ctvlpHy9rsElylDh7at6XbnXwH4eP4CZr47l1KlStG7+998qlkf\nOe0N0r8XPB4Pg57oTt1raublXZJGyuy5BJcpTYfrWtLt9g6527KPHaPjw31I6nYXnW5sW9Chiy9z\n8gzSN/5gMxeosn/PUdm3zqeyd8a4BFX2/shFP5MyxrQxxswt4PGJxpgaJZHpf2VF+npHsz6Bl57t\ny4jJ03O3Wc36NGaMGcY7U15h4dI0du7OyNOsTx3L9FEv8uXiYnFEFJx33Qa2bN9B6pTRvPR0D0ZM\nzfWx2rzJrzHj5cG8M+ElFi5byc6MPbnbU2a/T3h53yvLrcp+O6kp4x2Vff4xTrFjPHmMM8Y5KvvZ\nzE5+pURV9v6KzqTOgoj0LekMF8ry1em0a+mlWT94iEOHDxMWGupo1sOIqGh9es0bNWDp6nQuCQ4m\nobHVrIeFhDC8fx/f5V27nnaJVk111RWXkXXoMIcOHyEsNMTmDbVaeIDmDeuxdM16Ot3Yll9+28am\nX7fRulkjn2XNzXwWlX1u5jDvzFZlf0nZYBIaxXuNccmo7P2Vi66kjDFlsELPK4Bs4A0gzBjzDlAf\neF9EhhljFgE9gc5AOGCAq7Caqk+MMZ2wavYTwCoRedoYcznwDlbnXhprN95GAer14ji3jH2ZxNXK\nu1yKqJijWQ91NOtH2LJ1O7FVHc16A0eznn2MpOdeIOvQIXo+eB/NG8UXR7wC8u4nruZVeXnDK5CR\nmV8Lv4PYmGjS0r+laf06AIye/iZDej3CB/MX+iTn6ZkLUtnvK0RlXw/wVtnnjLHvVfb+ykVXUsD9\nwE4RudcY0xXr5qsNXIO9/N3MmZr1y0Skg6OmetwY8w0wGGguIseMMf80xiQCzYDPRWS4MaYhUBWr\ncf9dRLobY6KABUA9X5yot67M4/EwauDTDBo9gbDQUEezbrftz8oiecRQduzaxf19nmPB+//A4/G9\nOemMvM/0ZtDYZMJCQ6gWY7XwH85fSIPahmpV3eGzO1Nl/zSDRk0kLCyEalVjcs9p/4EskkcMYceu\n3dzf9zkW/PPNEhljf+RiLKmGwJcAIvKeMaYNsEZEjgAYYwr6P2ex8+827KwqDrgc+MxRt4djZ2bz\ngQ+MMRWxvr5lxpj7KUC97piNi5ToyEgy9mXmfr17zz4qR0bkft20QT1mJ48FYNyrs4itGu1o1ms7\nmvVLfapZj46MICPTSwu/N5PKEV5569dh9sSXbd6ZbxMbE80Xi9PY+vsuFi1fxc49ewkuU5qYqEgS\nGtUv9rwA0VER5xjjusxOfsVmnjHLUdkfc73K3s1cjAvnf3LmeZ84x3O8t3uwOvXVXjr1eBGZIyIb\nsJeM3wAjjTF/5+zq9SInsUlD5n/laNblZ6KjIvJp1od4adbTSGgUT4smDfNp1rN9pllPbNyA+V/b\nhfqNP20iOrISYSHl8vIOGJaXd/lKEhrWZ8KQfsyd9gqpyaPpfHM7krrd5bOCAmeMT1PZFzbGOSr7\n/GPsPpW9m7kYZ1IrsWr0940xt/K/XXoJcK0xJlpEdjtevhlAS+AXEfnQGLMHuAtYTsHq9SKnYd3a\nxNW6mq5JTxEU5GHokz0czXoI7Vsl0uXWm+j+9CA8Hnj0vrvyNOutW3D3408CMLhPks806w3jriGu\n5pV07f0cQR4PQ3s/yrzPFti8Lf5Clw7t6f7ci1YLf8+drvjGtir7mo7K3sPQvk/kU9nf5Kjs841x\nmxbcnfQU4NsxDgQCTrN+Lhxd+kzs5dkfwCzgDhHp7GzfIyJR+RbO94hIsjGmDpAsIm2chfOBwDFg\nLdALiMdq1g9hZ2y9gZ8oQL1+rpzVL6t2avOqr4vuxIuRGo1bAafYvPTzko5y3tRIuAGAzSu/KuEk\nF4Yn5qqLbiHroispf0FLqnjRkvIfdM6pKIqr0ZJSFMXVaEkpiuJqtKQURXE1WlKKorgaLSlFUVyN\nlpSiKK5GS0pRFFdzMf5ZjFIseKCUH/7vdPLPkk6gnAOdSSmK4mq0pBRFcTVaUoqiuBo/XERQFMXt\nOO8Y8hEwQUSS821rB7yMfaeQ/4rI8MKOpTMpRVGKFGNMKDAF5x1wC2AycCeQCNxgjKld2PG0pBRF\nKWqOAR2AHfk3GGOuBPaJyFYROQn8F7i+sIPp5Z6iBBJHDhT/G8SFhBf6nlYicgI44bz/f35igAyv\nr3djLUxnRWdSiqKUJOd8Ez+dSQUYfqdZn/oa6d85mvWej1D3Gm9l+XJS3kl18rai2x23kpb+LX1f\nGMXV1S8HoNaV1RnS+zGf5XV9Zve/0+4O7Gwqh1gKuCz0RkuqCDDGvAc8KCJHSzKHt2Z905bfGDh6\nAqkpE4A8zfq816dQsUIFHuk/hHYtm1O2bFmS35zDv2ZO5siRbKbMettnJWWV5TtInTqWTb9uZeCY\nSaROHZuXd/KrzJsxkYoVyvPIsy/QrsVfAGhSvw6TXxzgk4yBkNlNiMgWY0wFY0x1rCLuVuC+wp6j\nJVUEiEjXks4AfqhZX7Mu95v4qisuc/IWpixPJzamZKWg7s9c8jMpY0wjYBxQHfjDGNMZ+H/AZhH5\nAEgC3nV2TxWRHws7npZUIRhjKgBzgFAgBGuEuQ7oBJwEPhaRl40xW4A62AXAqVgLzUmgC1ABq3Xf\nhHXyrRWRh4sjr19q1mt5Kctz8zrK8iPemnWbNzamCpt+3UrSoOEcyDpIj/vvIbGxb/L6a2ZfIyKr\ngTaFbP8aaH6+x9OSKpwYYKbj0WsLPIvVplfF3oj2eL79o4FeIrLWGDMMO439GGgE3I39TcY2Y0xF\nEdlPMeP3mvXn+jJozKTTNOvVYy+lx9/v4ebrWrB1x07uf2ogn70zg+AyZXye15WZ3b8mdcFoSRXO\nLmCIMaYfUBY4DMwFvsDOsGYXsP9oY0wIcKnX9p9FZCeAMWYHVste5CXld5r1/MryvfuoHFnJK29d\nZk8ebfO+9g9iY6pQpXIkHdq2BODy2KpERVRi9569VKsagy/wx8z+jt6CUDh9ge0i0gJ7HY2IJGFn\nUDHAImOMd9FPAiaJSGvgVa/H82vci2Wa4n+a9Xjmf73U5v3xZ6Ij8+V99vl8yvL6fPz5Il5PnQfY\ny9u9mfuJjor0SV6/yHzqVPF/+BidSRVOFLDe+fwOINwYM1REhgHDjDGtsGtO3vtvMsaUxd5xu9yX\nYf1Os17nWuJqXUXXnv2tZr1PEvM+/cIqy1s2p8stN9K9/1A8Hg+P3tuFSuHhXJfYlH4jxrJgSRp/\n/HGC5/s+4dNLPX/M7O+owbgQjDFNgLeArUAyMBGoDHyPVakvFZHBXgvn9wJ9sIvks5zndABeF5HG\nzjFXAZ1FZEthr+1/BmPYnLaghJOcPzWatQX8KzOA59Jahc/CD+4p/m/o8lE+XbDUmVQhiMhK4Fqv\nh/7fWfar7nw6w/nI4QPn38Ze+zZGUZTzRktKUQKJALwy0oVzRVFcjc6kFCWQ0JmUoiiKb9GZlKIE\nFDqTUhRF8Sk6k1KUQCLwJlI6k1IUxd3oTEopIk75p7LcH9XwhaG/3VMURfEtAfZjRFEudnQmpSiK\n4lN0JqUogYSuSSmKovgWnUkpSiChMylFURTfojMpRQkoAm8mpSUVYPidZn3a66R/9yMeDwzq8TB1\nr8nzBn5ZhEcEAAAgAElEQVS5JI2U2e/bvNe1oNvtt+Ruyz52jI7de5PU7S463XS9z/KC/42xv6OX\nexeIMWaiMaZGSecoCG/N+kvP9mXE5Om523I06zPGDOOdKa+wcGkaO3dnkHkgi+Q35zB76limj3qR\nLxcv813edRvYsu13UpNH81K/noxInnl63ikzmPHyEN6Z8BILl61kZ8ae3O0p77xPeIXyPsuam9nt\nY6y2GEVE+pZ0hrPhf5r19bRLbGbzXnEZWYcKUZbH12Pp6nV0uul6fvltG5t+3UrrZo18ljU3s8vH\n2BdiFV9rYwO2pM6iSH8XeA3oDPwMrMaq0H8SkfuMMZcCrwPBWEPxwyLymzHmJ2ANMB/4G9AT2IaV\nf1YADgBdgYrA206EMsD9IrLJGPMz8CGQiJWC3iIiJ4v6nP1Os56ZSVytq/LyhlcoRFm+gab16wAw\nevoshvR6lA/m+9704m9jHAgE8uVejiL9OmAAVpFeCls2TbCFsUVEmgItjTEVgeHAOBG5HquvGuIc\n60pgmIi87nX8fsBnItIS+BJoh9WvD3Ne8w3gCa/nvyUizYFKQL1iOufTOJtmveeg4bma9VOnrGZ9\nyoghjBzwFANHTvDJT+MC83p97vF4GPVsHwa9MoWeQ0dRrWo0pzjFh/MX0qC2oVrVKiWSMT/uG+NT\nPvjwLQE7k6JgRTrAChE5ZYzZBax1HtuNVZ8nAMYYMxhbaBnO9sMisjHf8RvilJiITMA+8TJgsjHm\nRWwZrXb2zRKRHMnoNue1ihy/06xHRpCxL882b5XlXnnr12H2pJE278y3ia0SzRdLlrP1910sWr6K\nnRl7CS5TmpjKUSQ0ql/seW1m/xrjQCCQZ1JnKNIdTpzlcw9wHOgiIm1EpKWIdHK2HS/g+H9y5vgN\nw86uWgEvnuV1cl6ryPFvzfomR1leLi/vc8Py8i5bSUKj+kwY0p+508aSmjyGzh3akdTtLp8VFPjB\nGOvCuV+RX5EefB7PSQNuB1KMMW2BGBGZc5Z9VwJtgZXGmMeAbPI06x7gNuxszGf4nWY97hqrLO/1\nLEFBQQzt/SjzPv2S8mGhtG/xF7rc0p7uz75gleX33Omz8iw0s5+NcSAQsJr1syjSSwFxInLIW3ee\n8zl2xjQLKIe9+H5ARDYbY/aISJRz3EXYhfOtzvHDgYNYxXprYCywBZiCtRk/CMzxev5cIFlEFhWW\n3/806/+fvfMOj6Jq+/C9oUkKqYQShFAfIBRDibRQJOIrlvdVQUGwC0oTEFBMpEgvglQBERQFBUXw\ns4MiiFIFQhDQoyIdhTSSQACV5PvjTJJNCBA0u5ss576uXNnZmZ397YGcfebMzLkzObjlK1dHKTDV\nW94KQHFp4yxsFWpcsQrPPHnQ4X/QtgrVnXqCz207qeKO6aQci+mk/k0G53ZS7ny4ZzBch7hf0WEO\njA0GQ5HGVFIGgzvhhsM3ppIyGAxFGlNJGQxuhamkDAaDwamYSspgcCfMmJTBYDA4F1NJFWmcPXPP\nv6Q4adazmtbmZt/TppIyGAwG52IqKYPBrTCVlMFgMDgVU0kZDO6E+xVSppIyGAxFG1NJGQzuhDm7\nZzAYDM7FVFIGg1vhfpWU6aTcDK0A/9FSgD+djwL8HUsB3j6PAvx9OwX4zc7LO+8Ndv/4MzabjZi+\nj9NQauXk3bydectW6rztW9Pzf52z152/cIG7eg2mT48u3HvbLU7LCzBx1nytWbdBzDN98rTxZua9\nZWnWO7aj533/Baw2fuc9q40fpn0r57Vxcceph3si8qCIKBGJzPN8kVWX50VEKorIAlfnyA+tAD/O\ninkzGP/8YMbNmpe9TivA5/LalLEsnf0y6zdvtVOAL2PZ3GmWAnyr8/LG7ePQ8d9ZMWsi45/ty7i5\nOVrDjIwMxs55ndfGx7B0+ljWWwqrLOYtW4mvj7fTsmZnjrXaeP4Mxj//LONmXqaN57zM+k12mvU3\nlrLs1enMnzzGaNavEWdXUlHA80qpb+2fLMrq8rwopf4AnnJ1jvy4NgV4OJt3xnJD6TK0ahbuGs16\n7B6iWkXovNWq5NGsp+HjlUezviuOe2+7RWvWjxxzkWY9lqjIVjpzaFVS09IubWN/O836jlhuKFM6\ndxs/V2z+uxcJHNJJXUZx7gt0BpqLSDLwOg5Ul4tIFNpI/CeQDNyPln8ORHvwmgDjgf8A4cAwpdSH\nInIvMMTaZodSaoiIPArcDlQGhgMzlVLNRORWYALawbdcKTVDRHpYn/cisE8p1dt6fRugPCDA1Dw2\n5ELh2hTgcUTcpEXK586fp8/wUZYCvKcTNeun82jWfYlPPm1p1stx9pydZj1uLxGNwgCYvGAJI/o/\nyeovNzglZ67MScmESZ42TrRr4/Rzuds4PKuNL+g2Tkuj/2MP0bKZg9rYnN0rMJcozpVSXwJfAC8o\npb7B8epyf+BBpVQ7IBW4zXr+JqAn8DQwCa2cehp4VES8gReBW6zX3Sgira3XVQXaAscBLLfeq+iO\ntzUQJSJl0R3zf5RSrYG6ItLQen1D4F6012/ANbXmP+RSBfhQYiZPp3/MGKpUqkhmZiaZmZmcTk1j\n9riRTHxhCNETp7tOs54377ABxEx7lf6jp1CloqVZ/3JDEdOs5zzObuNJ0+gf/ZLVxvaa9ZFMjB5K\n9MRpLmvj4oijDvcupzi3x9Hq8njgdREpie7Qvkb78eKUUhdE5HfgZ6XUWUu57guEoTujNSKC9Vw1\na3/fW3r2rP2XB84rpbJU7HdaeZOA/7O2qwcEWuu3KKUuiogDNesBxCclZS/nrwCfBsC0BYsJqVTB\nUoDXKzqa9QD/nLyNw1j2yjidd9FSS7O+LUeznpBI6VKlqFg+kFZNnKRZD8qrWU+kfJBdG4c3Ytnc\n6Trz/MWEVKzA+QsXnKhZd7/Oz1GV1OUU5/Y4Wl2+GOhvVUT/d5nX5adZ32lp1tsrpcLtDMZ5816S\nVURKA3OBB6z33XaF9yp0WjdvaqcA/yUfBfiLRUuz3rQxa7/Vg8j7fvntUs169DgSk1N03q07aNWk\nEa+8OISVc6ewYvYkutweRZ8eXZzWQYGlWd+gh1R1GwfmbuOhMXZtvJVWzcJpE9GUbbt2u6SN3QFH\nVVL/RHEOhasu9wWOiIgf0MEuz5VQQD0RCVZKnbKqt9fy3VCpRBEpISIhwAngY+AR4G+l1B9WBdiM\ngn/2f41WgNemW5/BlgK8P6s+X4uPl5edAjxaa8t7PJBHAa4Hc52uWa9dg24Do/Gw2Rg5oBer1nyt\n87a5ma63R/HE8DE6b7d7i8QfdpOGYYRJbbr1GYSHzYORz/Zj1WdrtRq+bWu63nU7Tzz7gs7cs1tO\nG7eP5IGn9UmJFwf1dVwbu+FhpKM6qbeAt0SkK1px3l1EHivA62Zar9tAjro8Ea0sP2T9fk1EOhVg\nX3OBTcDPwBRgNBB9pRcopdJFZBDwmYhcAGLRHdDl6AustB6/Z3VcX4rI90Cc9b6voBXvTmHI04/n\nWq5bq0b2407t2tCpXZtLXtPtv3fQ7b93ODxbfgx58qFcy3VrhmY/7hTZgk6RLS772gEPP+CoWFdk\nyNNP5FquWytn8L8otnFxx2jWiyhas/7t1TcsAlRvFglkcnDTGldHKTDV2/wHgIM7vnNxkmvDFhx6\nZc36oTjHa9ZDGzt1ylhz757BYCjSmNtiDAZ3wg2PjEwlZTAYijSmkjIY3ImMjKtvU8wwlZTBYCjS\nmErKYHAnzJiUwWAwOBdTSRkM7kSmGZMyGAwGp2IqKUMhYYNSZVwdouBkjd38fcG1OQobMyZlMBgM\nzsVUUgaDO2HGpAwGg8G5mErKYHAjnDGriVOnQMBUUgaDoYhjKimDwZ0w9+4ZDAaDczGVlMHgTrjh\n2T3TSbkZE2cvYPf+H7FhI+aZp2lYL1vBxbpvtzDv7XcoXaoUnW9pT8/77gbg47Vf8/q771OiRAme\neeIh2re82Xl55yxk9/6fsNlsxAzoTcO6dXLyfreVeW+vsPJG0vPeu9gWu4dBoydRK7QqAHVqhDJi\n4NNOywswce7r7N6vdOb+vWhYN0cWuu67rcxb+l5O5nvuZNvuHxg0erJd5mqMeMZBEmw3vJjTdFLX\niGUjTlFKrXZ1lrxs372HQ8eOs2LeDA4cOkL05OmsmKcdEBkZGYydMZdVi+bgV64cvYa9SFRkS8qU\nKcOcN5fxweuzSU8/x+w3ljqtk9q++wcOHTvBilenceDwUaInz2DFq9Ny8s6cz6qFM/Er50Ov50cR\n1UYr5Js3bsCsMVd0ajgw816dee5UnXnKLFbMnZqTedZrrHrtFSvzS0S1aZGT+aXhLslc3DGd1DWi\nlHrT1Rkux9adu4mK1H/INUOrkpp2hjNnz+Lt5UVySio+3t4E+GkhZcum4WzeGcsNpcvQqlk43p6e\neHt6MnbYQOfl3RWX/Udcs9qNpKad5czZdLy9PK28Wl0O0LJJYzbv3E1IxWCn5cuPSzOfuULmRmze\nGefczOZwr/hiVUDt0B6/MCAG6A7UB3qgHXkPAhnAh0qpaSKyCPhcKbVSRF4HvgLqAglKqTkiMhO4\nGS3+fBr4CVgCVEHr1kcrpT6xFF1fop2CQcBdSqkjhf0Z45OSCauTc+gR4OdLfFIy3l76D+fsuXQO\nHT1OSKUKbIuNI+ImbaY/d/48fYaPIvXMGfo/1pOWTcMLO9oV8tayy1vOyuup86af49Cx44RUrMC2\n2B+IuKkhIRWDOXD4KH2ix5CSeoZ+j3andTPn5M3JnKOwymlj+8wnCKkYzLbdeTLHjCMlNY1+j3Rz\naubiznXTSVnUBiKBJ4EXgHDgUbSPrxyQJUzbJCLvA8+hHXxHgBCl1HIRGQ0gIlHAjUqpFiLSFngA\n7QVcq5RaIiI1gPeBT6x9piqlOorIJOBenODis7+wz2azMSl6KDGTp+Pt5UWVShWz159OTWPOuJGc\nOHmSRwY+z9fvv4XN5uxL9nIPp9hsNia9MJiYyTOtvBXIzMwktEpl+j3Snds7RHL0xB88MjiaNcte\no3SpUk7PqzPnaePhg4iZMgtvL0+qVLQyh1Sm38PduL1DG5352RjWLF3gmMxuOCZ1vV2CsEMplQn8\nDuxRSl0ETgKN0B3YeuvHBwhVSiWiDcYfAwPy7KsJWj6KUmqjUmoEkAw0F5FN6Ioq0G77LIneMbRd\nudAJDgwgPikpe/lUQhLlAwOylyNuasSyOdNYMHkM3l6ehFSqQGCAP+EN6lGyZAmqhlTGy7MsSadT\nHBHvMnmTc/ImJlI+0N8ub0OWzZ7CgkmjdN6KwVQoH0TnW9pis9moGlKJoAA/TiUkOiUvQHBQAPFJ\np+0yJ+XJ3IBlsyaxYOJIvL2zMgfS+ZZIu8z+Ts1c3LneOqm/L/M4APhUKdXe+mmolNporasInAHy\nDixc5NL2e9DaVyRaL3+593ZImdK6eVPWfqNll/vULwQHBeDt6Zm9vtewF0lMPk36ufNs2LyNVk3D\nadO8Cdt2xZGRkUFySirp5847TWfeunkT1n6zSef9+VeCAwNz531ulF3e7bRqehMff7meRctXARCf\nmExi8mmCgwLz3b9DMjcLZ+3GrMwHCA7M08bPj7bL/L2VeQOLVujzLPFJDs6cmeH4HydzvR3uXY6d\nQAcR8QTOoQ/FhqM7qE5AR2CFiLS2e8331jZTRSQcfQj5G3BQKZUhIvcCpZ34GWjSsD5hdWrTrc9g\nPDxsjBzcn1Wfr8XHy4tb27am653/4Ykh0dhsNnr3eAB/a4C3U7s2PPD0IABeHNgHDw/nfHc1aVCP\nsDq16NZvKB42D0YOeppVn3+Fj7cnt0a2ouudt/HE0BFW3q74+/nSofXNDB07la83beWvv/9m1OC+\nTj3Uy87c/zk8bDZGDnyaVV+sw8fLk1sjW9L1jk48MWwUNhv0frAL/r7l6NA6gqHjpvH1pm389dff\njBrUx2WHp8WR60azbg2cN1BKDRWRO4EuSqlHsx4D24HH0RXSh0qpiSLyCTBBKbVZRMYAaegB8ayB\n82lAhPUWfa31HwHxwGJgIHpM6hagv1Jqr4j0B4KUUqOvlLf4adbh4Pb1Lk5ScKpHtAfg4LavXRvk\nGrFVlitW4RmxXzn8D9ojPMqpA5bXTSdV3DCdlGMxndQ/x9mdlDncMxjcCTe8Tup6Gzg3GAzFDFNJ\nGQzuhBsO35hOymAwFCoi8grQAsgEBiqlvrdb91/gReACsFwpNedq+zOHewaDO+Hi66REpB1QWynV\nEngCmGW3zgOYA3QG2gJ3iUiVq30k00kZDIbCpCPwIYBS6kfAX0Syrg4OAk4rpeKVUhnAOiDqajs0\nh3sGgzvh+jGpiuiLo7OIt55LtR77iEht4BDQAdhwtR2aSspgMDiS7GuqrPtmH0Ff6LwaOEgBbhEz\nlVRRxQY46faUf40NuHiRzORTrk5yDVh/GyWceueS43H9dVIn0JVTFpXRN/QDoJT6Bn1vKyIyEV1R\nXZFi8ldgMBiKCWvRt5khIk2AE0qptKyVIvK5iASLiBdwF3qOtitiKimDwZ3IcO2YlHWf604R2Yye\nQLJfnim3F6I7skxgolIq4Wr7NJ2UwWAoVJRSeSdzj7NbtwpYdS37M52UweBOuH5MqtAxY1IGg6FI\nYyopg8GdMJWUwWAwOBdTSRkMboQ7TmJpOik3Y+KseezeZ2nLB/bJo1nfzLwllmY9qj097/svKz/5\nnP/7Yl32NvvUz+z68iPn5V38LnHqN2w2iH7iQRrWrp6Td1ss81d+TOmSJekceTM9OncEYOqS99i5\n/xcuZlyk17130KllU6flheKnsi/uFKtOSkTuU0p94Oocl8N+7nRXvP/22D1aAb5gptasT5zGigUz\nAUsB/socVi16FT/fcvQaGkNUZCu63Hk7Xe68Pfv1X3z9jfPy7lUcPnGS5ZNjOHD0BDFz3mD55Jjs\nvOMWLuWDaaPw8/Gm99hX6BgRzuHfT/LLkeMsnxxDcuoZ7hsy2qmdVJFX2ZsxKdchIqFo47DhMmzd\nGUtUZCsgS7OexpmzZwFITknRmnV/Pzw8PLRmfUdsrte/+uZS+jzaw3l59+yn481NdN4bK5N69ixn\n0s/pvKln8PHyJMC3HB4eHrRoVJ8te/bTrL4wY1hfAMp5eZJ+/gIXLzrvD/NyKnsgl8o+u413xrJl\nR2y2yj44KNCpKnt3oDhVUnOBCBHJAJYC1dHTPCymAFpzIAV4Dyhj/fQDfrD2VQ3YDNyvlKoiIvXR\n895kog0wjwJ+aOHnAaAxEKuUelJEGgJvAUnWOgBEpB+XattHAzWs7O0tOWmhEZ+YRJjYa9b9iE/M\n0qz7aQV4lmZ9124iwhtnb/vDj4qKweVzyUQdTcLpFMJqhubkLedDfHIK3p5lCfD14ey58xw6cZKQ\n4EC2//AjzRvUpUQJDzxLlAHgg3Ubadu0ESVKOO+7tsir7M2YlEuZCvQH9gJ1lVKRIhJMwbXmR4Bj\nSqknrG3rAP8BbrBU6XcCg6zXzgaeUkr9IiJ90R3aMqApWqd+CjgmIn7ACHTn+H8iMg9ARKqj71/K\nq20HKK2UinREA+XlEgV4zDBiJk7D2zu3Zh3g/Y8/557bOzkj1mXJm3fiM0/y4pzFeHuWJaRC+Vx/\ngOu2xfLBV9/y+qghroiaTXFT2RdHis3hXh62W7+vRWu+BWgpIvOBWkqpL4B6WKp04DNyLMMRwEKr\nInsIqGA9/6tS6g9rwq4T1n7ro6swyJkbJ4J8tO15shc6wUGBxCfaa9YTKR9kp1kPb8SyV6ezYMpY\nvL29CKlUIXvd9tg9hDes76ho+ecN8CPBTul+Kvk0wQE5BvqIBsLSCS8w/8VB+HiWpXJwEADfxe5l\nwcpPWDBiMD5enpfs16GZi7rK3g0NxsW1k/rT+l1grblS6nf0YdoqoI+IjETP15HV6pnWD0A60MFS\nrrdUSj2Tzz6xXm+/j6z2/JPLa9v/xEG0jmjK2g26b9aa9Tza8iHRJCYnk37uHBs2baVVMz0edDIh\nEc+yNzjdqtv6pgas2bJD5z1wmGB/P7zKls1e33vMdBJPp5J+/gLrv4+jVeP6pJ1NZ+qS95gXMxA/\nH2+n5oXip7J3B4rT4V4Gl+YNooBacxGJAkoppT4Xkf3Aq+ixpC7WJp3s9h+HPhT8XES6oWcUPED+\nKKAZsAY90yDomQkn56NtdyhNGoYRJnXo9vQgrQB/tj+rPlurFeDt2tD17s48MfgFrS1/qFu2Zj0+\nIZFAfz9Hx7uE8Lq1CKtRje7Dx+NhszGid09Wf/0d3p5lubVFU7rc2o4nX5qm897XGf9yPry3dgPJ\nqWkMfnle9n4mDXySyuUDr/BOhUeRV9m7eBYER1BsDMYiUh79x/8BcMDSnIdSQK058CZ6kPxvdIc3\nCvgeXVmVQx+q9VZKBYtIPeA1a7tz6IqtHLBSKdXMyrMD3cEFAG8Ax4HfAG9L396XS7Xto7EU7Vf7\nvKFVq2Qe3LnpapsVCao3bQ0XL/LblytdHaXA1LjtAQCKiyU6C1uF6lccyLq4YbnD/6BLtO9mNOvO\nQkQC0Id1H4hICLBOKVXX1bnAdFKOxm07qfXvOL6T6vCg0aw7kTTgfhEZhh5PGuziPAaDIQ/XdSel\nlPoLfUmBweAeuOGRUXE9u2cwGK4TrutKymBwO8y9ewaDweBcTCVlMLgTZkzKYDAYnIuppAwGdyLD\n/cakTCdVZMm6LbA4YIMSJfGoWP3qmxYVsgaYzyS7Nse1UqEYtXEhYTopg8GdMGNSBoPB4FxMJWUw\nuBPmOimDwWBwLqaSMhjcCTcckzKdlMHgTrjhJQjmcM9gMBRpTCVlMLgT5nDPUNTRmvUfLc1633w0\n68vsNOv/szTrX2VvozXrHzst74QZc4nbtx8bNqIH96dR/ZyJUb/a+B3z3lxK6VKluCPqFnp2vYez\n6ed4fsxEUtLS+OvPv+j3xMNEtohwWl6Aia+9xe6ffsVmg5inHqFhnZrZ69Zt2cG85at1G7drSc+7\nbuPc+Qu8MH0eCadT+PPPv+jT/V46WFJUw9Upkp2UiFQEXlJKPZXn+ZeBvUqpN+2e87aeCxWRQ0AD\npdQZJ2TsopQqUvPlbo+N0wrwBbM4cOiwpVmfBeSnWY8mKrJ1Hs16nHM167t2c/joMVYsnKvzjp/C\nioVzc/JOm8XqN1/TeZ99nqh2bfjqm++oXvVGhvTtxcn4BB7p/yxfrHjLeZl/2M+h43+wYvoYDhw5\nTvSMBayYPiYn87w3WDV7In4+3vQaOZmols3Yte9nGtSuwZNd7+b4yXgej5nguE7KXILgHCy33VNX\n39KlONz+cq3k1qxXy6MAT8HH2yuPZn1XrtdrzXpPp+XdsmMXUe3aZOdNSbXTwp9OoZydFr5FsyZs\n/n4n/n6+nE7VzrrUtLRsG4uz2Lp7H1Etm+nMVUNIPXOWM+npOnNqGj5eXtlq+JaNw9gcu5fO7Vry\nZNe7AfgjIZGKQc6zRLsDLqmkRORRoB3a4hIGxADd0aLNHsBJLDOLiPQEnkdLPs8Be0WkHNoacwPw\nXT77rwwsQiuuLgJPKqWO5NlmPNrXVwKYo5R6V0Qao3Xuf6FNMV3R86AvBSqh9eyjgIZAYxFZpZS6\n9zL7ehPt2AsEPkbbjMsDAkxVSi36N22YH/GJyYRJnezlAD/ffDTrxwipVJFtu+KICG+Uva1LNOtJ\nSYTVtcvr70d8YpLO6+/H2fT0nLw7dxPR5CZ6P9SdVZ9+wa1depCadoYF0yY6LS9AfPJpwmrl3D8X\n4OtDfFIK3p6eBPiW4+y5cxw6/jshFcqzbc9+IhrlCFe7DRnJyYQk5o8e5riAbqi0cmUlVRu4G5gI\nvICWe05Ed1YAiIgNmAB0tLatZa3qiT7EiwR257PvscA0pVRHtPNuhP1KEYkEqiml2qL1Vy+KSFkg\nGBiglOqANhv3QHdIQda2twEBSqmpQIrVQV1uXwBJSqn7rMcN0br3/wEDrrm1/gGX06z3jx5tKcBz\nttWa9ducEeuyXJJ3xHCix0+h//MjqFK5ImRm8n9ffEnlisF8uXIZS+ZMY8y0mS5MnHuc2mazMWlI\nH2JmLKD/2OlUqVg+12daPm0Mr44cyrCpc7meLU3Xiis7qR1KqUzgd2CPUuoiuoKyr98DgTSl1ClL\nmpDleMpPbW5PK2C0pUl/gdz69az1Laz1a9DtUMl6/wki8g26swwEfgJ8RORtdCe0vID7gtxK9S3W\nZ8zSvhc6V9esN2bZq6+wYMq4fDTrcc7XrAcFkZA3b2DOP1VEk5t4Z/4sFkybiI+XNyGVKrJrz17a\n3NwcgLq1a3EqIZGLFy86L3OAP/HJp3MyJyVTPiBHrBrRsD7Lpo5mwUvP4e3pSUiF8uz95Td+j08E\noF7NUC5ezCApJdUxAY1mvVD5+zKPbXke27eKRz7P5/cZ/gS6WorzSKXUvfmsX2SnQa+nlPoNmAnM\nVEq1AxYAKKXSgRbWcmfg9QLuK2vd1T5jofHPNesJeJYt63zNekQz1qzfaOX9Wef1ysn75ODnSUzS\neddv2kzL5k2pViWEuH0/AnD89z/wKluWEiVKOC9zk0as/W6bzvzrQYID/PH2zFHD9xoxicTTKaSf\nP8+G7btodVMDduz9iTdWfQJAQvJp0s+fx7+cj9MyF3eK5Nk9OxIBXxHxA84CrYEt5KjNPyBHbW7P\nNvRh1TwRuQWoqJR6J8/6l0VkMnrcaqpSagB6jOyAiJRBd0hbRaQJUF8ptVREtgFZNkmPq+zL6WjN\nem26PT3Q0qwPYNVna7QCvF0but59O08MHp6PZj3JJZr1Jo0aEFa3Dt169cfmYWPU0IGs+vQLnbd9\nJPf/9w4eHzQMGzZ6P/wgAX6+PPC/u4geP5mefQby98WLjH7OuarEJvXrEFa7Bt2GjMTD5sHIvo+x\n6stvtMq+VXO6/ucWnnhxIjag9/3/xd+3HN06RxEzYwE9ho3m/IU/GdH3Mcdp1t3wMLJId1JKqQxL\nTQ5QHbEAACAASURBVP4NcAjYa616C1gtIuvQA+d5/2VGA2+ISHdr3aN59rtZRNajOzwb8Kq1ajbw\nIXDAejwHfQjXU0SeQg/CT7W2jRWR7UqpiMvsyyUM6fNkruW6tXOu4enULpJO7SIveU2DunVYOG2C\nw7Plx9C+vXMt161dK/txp/Zt6dS+ba71Xp5lmTl+tDOiXZYhj3XPtVy3RrXsx51aR9Cpde7rtm4o\nU5ppz7vke8stuK4160WZ0Ko3Fi/NOnBo9zYXJyk4oY30ZQQHv/nExUmuDVvNJlfWrK+a6XjN+r0D\nnTplbJG8TspgMBiyKNKHewaD4Rox10kZDAaDczGVlMHgTph79wwGg8G5mErKYHAn3PBsvamkDAZD\nkcZUUgaDO+GGZ/dMJ1WEsdmKi2YdyMwk889zrk5RcLLa9gYv1+YwXBXTSRkM7oQ5u2cwGAzOxVRS\nBoM7Yc7uGQwGg3MxlZTB4E6YMSmDwWBwLqaSMhjcCTe8TspUUgaDoUhjKik3Y8LMV4mzNOvRg/rS\nqJ6dtvzbTcx7cxmlS5fijo4d6Nnlf7z/8ed8tObL7G32/vQzsV85b7bKiXNeY/e+n7QWfsBTNKyX\n4+Fb990W5r21nNKlS9H5lnb0vPcuAD7+cj2vv7uSEiVK8MzjPWnf0sma9bmvs/tHhQ0bMf170bBu\n7ZzMm7Yyb+l7WrPeIZKe99zJtt0/MOilydQKrQpAnerVGPGMg9y3bjgmZTqpAiIi9ymlPnB1jiux\nPTaOw8eOs+K12VpbPuFlVrw2G7AU4NPnsHrxPK0tH/ICUW1b0/Wu2+l6V45m/XNnatZ3/6C18POm\nc+DQEaInz2DFvOk5eWfMY9Xrs/Er50Ov50YS1aYlZcqUZs6by/hg4SzSz51n9uKlTu2ktsft5dDx\nE6yYM5UDh48SPXUWK+ZMzck86zVWLXhFZx7+ElFtWgDQvHEDZo0uctLrYoE53CsAIhKKnbS0qLJl\nRyxRkXq+8Zqh1UjJo1kvZ6dZ19ry3Jr1uW8spa8TNetbd+4mqk1LK29VUs+c4cxZS1mekqq18H6+\nWlnepDGbd8ayZWcsrZqG4+3pSXBgAGOHPeO0vABbd8UR1Vp3PDWr3Wip7C+XuRGbd8Y5NR+ZmY7/\ncTKmkioYc4EIEclAK9erA1HAYqAK4AWMVkp9YklCv0SLRIOAu4AU4D20pr0M0E8ptSvvm/xbtLY8\n59Djypr13USEN87eds+PP1HJyZr1+KRkwiTHDhPg60t8UhLeXp4E+PlqZfmx44RUrMC22D3ZWvhz\nFy7Q54WXSE07Q//HetCy6U3OzVwnx8AT4OdLfFJyTub0cxw6doKQisFs2/0DEY0bElIxmAOHj9In\nZhwpaWn0e7gbrZuFOy1zccd0UgVjKtAfrdSqq5SKFJFgYK1SaomI1ADeB7IGc1KVUh1FZBJarX4E\nOKaUesLatk4+71HoZNqZvmw2G5NefI7oCS/j4+1FlUoVc30rrvz4c+7p3MkZsS7LJXlfGELMpBl4\ne3taWni9/nRKKnPGjeDEyVM8Mmg4X7/3pstuxr5EDT98EDFTZ+Ht5UmVihXIzMwkNKQy/R7uxu3t\n23D0xB88MiSGNW8vcIyMNcP9xqTM4d61k6VOTwaai8gmYAm5Ve5ZAtEspfoWoKWIzAdqKaW+cESw\n4KBAEhKTs5e1tjy3Zv2deTNYMHU8Pl5ehFSqmL1u2644whuGOSLWFfIGEJ9knzcpd96bGrJszlQW\nTHoJb29PQipWINDfj/AG9ShZsgRVQyrh5VmWpNMpzsscGEB8kp1mPTGJ8oH+OZkbN2DZzEksmDAS\nby9PQioGU6F8IJ07RGKz2agaUokgf39OJSQ6LXNxx3RS106WOv1BIACIBO7Js00upbpS6negMbAK\n6CMiIx0RLLe2/JdLteVDXsjWrK/ftJWWWZr1+AS8PF2gWW/ehLUbvtN5f/6V4KCA3Fr4YSNITD5N\n+rnzbNi8nVZNb6JN8yZs2xVHRkYGySmppJ87h79vOedlbhbO2o2brMwHCA7Mk3n46JzMW76nVdOb\n+PirDSxasRrQh4uJyacJDgrMd///GjMmdd2SwaVtFQQctCzL96IV6/kiIlFAKaXU5yKyHwdZjps0\nDCOsbm26PfWM1pY/+wyrPl2Dj7fWrN9/V2ceHzQcmw16P9SdgCzNemISAa7QrDeor7XwfYfg4WFj\n5KC+rPr8S61Zb9uKrnf9hyeGvqiV5T3uz9bCd2rfhgf6PAvAiwP7OE5Znm/meoTVrkW3/s/pzAOf\nZtUX67RmPbIlXTt34onnRuk2frAL/r7l6NAqgqHjpvH15m389dffjBrUx+lfCMUZYzAuACJSHtgJ\nfAAcUErNsc74fQTEowfQB6LHpG4B+iul9opIf3Rn9iZ6wP1vdIc3Sin1bd73sSe06o2Zh3ZtdswH\nKmRCm7SCzEwOfu+8yxf+LdWba337wa1fuzjJtWELkSsbjBeNdLzB+IkxTh0ANJVUAVBKxQNV8zx3\nCGhk99Qy6/cYu23m2K1v46h8BoM7Yzopg8GdcMMjIzNwbjAYijSmkjIY3AlznZTBYDA4F1NJGQzu\nhBmTMhgMBudiKimDwZ1ww/mkTCVlMBiKNKaSKsoUJ836xb/IPKpcnaLg2PT3s+0GbxcHKWTMHOcG\ng8HgXEwlZTC4E244JmU6KYPBnTCXIBgMBoNzMZWUweBOmErKYDAYnIuppAwGd8LcYGwwGAzOxVRS\nbsaEma8St3e/pVnvR6P6dpr1jXaa9agszfpnfPTFV9nb7P1JEbvuU1dEZ+LS1cT9elhn73kPDWvm\nTIa6bucPzP/wS0qXKknnFuH06BTpkowAE2bOJW7vj9hsED2ofz5tvNSuje+x2theZa+IXfeZY8K5\n4ZjUdd9JicijQAOl1NACbj8aSMgzNTAi8n9Kqf8WfsKCsz02jsNHj7Fi4RytWR8/lRULdUytWZ/N\n6jfma836s1ma9c50vatz9us/X7fBNdl//JXDfySwfPQgDhw/SczCd1k+elB29nFLVvHBuCH4eXvS\ne+prdGzakIqBzpdH6DY+foU2nsXqNxZYbTycqLZtikwbF1fM4V4h4eoOCmDLjl1Etb2MZv10CuW8\nve006+Fs/n5nrtfPXfw2fR97yOm5Abbu+4WOTRsCUDOkAqlnz3Em/TwAyWln8fEsS0A5b509rDZb\n9v3skpyXtnHaFdq4ST5t/JZj29gordwXEemHdullAB8qpaaJSDhaP3XB+nnA2ry5iKwFKgNDlVJf\niEiCUirI0leNRfv5koH7gVZoA3IGUA9YqZR6qbA/Q0JiMmGSI0cO8PclPjFJa9b9/Tibnn55zfr+\nn6hUwbmadXsSUlIJq14leznAx4v4lFS8PW8goJw3Z8+f59Af8YQEBbB9/680r1frCntzYM7EpDxt\n7HeNbRzssjYurphOSlMdaEqO0WWTiLwPPAa8qpR6W0RuAbKUv8FKqU4i0gBtL7Y3EvsDDyqlDorI\nW8BtQBoQAdRFV6+HgELvpPJi/6Vns9mYNOJ5oidMxcfLiyqVKmFnNWflx59xT+fbHB2pwNh/X9ts\nNiY+9SAvvvYu3p5lCSkfWGTGXi7RrI8YbtfGeVX2TmjjItIuhYnppDRNgFLAemvZBwgF/g+YJyJ1\ngBVKqZ9EBGADgOXWuzHPvuKB10WkJFAD+BrdSe1SSqUDWPsodIKDAklISspe1pr1HFOu1qzPBGDa\nvNcJqVQhe9222DhefHaAQ3IVhGA/XxJOp2Uvn0pOIdgvx0wcUa8WS0c+A8D0FZ9QubxrqpFra+OF\nuVX2sbtd2sbOQkReAVqgv2sGKqW+t1vXD+gJXAR2KKUGXW1/ZkxKkwF8qpRqb/00VEptVEqtA5oD\nPwFLRKSDtb3911Xer67FaDloO3Qnl8XfOJjWN9tr1n++VLP+7HASkyzN+ndbaNm8KWBp1ss6X7Nu\nT+uGwprv4wDYd/Aowf6+eJW9IXt97ykLSExJI/38BdbH7qNVgzqX25Vjcxb1Ns7IcPzPFRCRdkBt\npVRL4Alglt26csAwIFIp1QaoLyItrvaRTCWl+QboICKewDlgBjAc3cifKqWWiYgNCLe2bwNMEZFG\nwOE8+/IFjoiIH9AB2OOMDwCWZl3q0K33AGweHowa8gyrPv0CH29vrVm/+w4eH/w8Nmz0ftj1mnV7\nwutUJyy0Ct1fmomHzcaIR+5j9cbteJe9gVubN6JLh5Y8OXk+NpuN3ndF4e/jmnmgmjRsoNXwvftb\nbTzQamMvbm0Xyf13d+bxwc9Zbfxgnjb2d0lmJ9MR+BBAKfWjiPiLSDmlVCp6nPZPwFtEzgCeQNLl\nd6UxnZQmCd0xbUSXoR8qpc6JyK/A+yKSgh44fwzoA5wSkY/Qh3MD8+xrLrAJ+BmYAowGop3xIQCG\n9u2Va7lu7ZrZjzu1j6RT+0uvL2pQtw6vT5/k8GxXY0i3u3It160Wkv24U/NGdGreKO9LXMLQvr1z\nLedu47Z0at/2ktc4rY1dPyZVEbA/pRlvPZeqlDovIi8Bv6GLgeVKqauepr3uOyml1Jt2i6/mWfcF\nuQfFQXc6+e0nyPo9Ehhpt2qJ9fvdvNsaDNcB2dPLWod70UAdIBX4WkQaK6XirrSD676TMhjcCtdX\nUifIOQsO+jKd363H9YDflFIJACLyLfqs+hU7KTNwbjAYCpO1QBcAEWkCnFBKZZ22PQTUE5Gy1nIz\n4Jer7dBUUgaDO+HiWRCUUptFZKeIbEafNe9n3XqWopRaLSJTgfUi8jewWSn17dX2aTopg8FQqCil\nhud5Ks5u3QJgwbXsz3RSBoM74foxqULHjEkZDIYijamkDAZ3wlRSBoPB4FxMJWUoHEqVwUOauzpF\nwbmob6XMOHnItTmuEY/AkCtvYCopg8FgcC6mkjIY3AljizEYDAbnYiopg8GdMGNSBoPB4FxMJWUw\nuBOmkjIYDAbnYiopg8GdcMNKynRSbkZx06xPmD6LuL37sGEjeshAGoXVy8n7zbfMW7RE5+0URc/7\n7yMjI4NRE6fyy4GDlCpVktEvDKNmaDWn5QWYuPhd4tRvWrP+xIM0rF09e926bbHMX/kxpUuWpHPk\nzfTo3BGAqUveY+f+X7iYcZFe995Bp5ZNnZq5OGM6KTtE5D6l1AcF3HYGMFMpdfAy6w+h9e1n/ul7\nXCvFTbO+fWeszrt4AQcOHiJ67ERWLF6Qk3fKK6xeugg/X196DRxKVLtIftj/I2lnzrJ88XyOHDvO\n+GkzWfDKFOdl3qs4fOIkyyfHcODoCWLmvMHyyTHZmcctXMoH00bh5+NN77Gv0DEinMO/n+SXI8dZ\nPjmG5NQz3DdktMM6qUxznZT7IiKhQPeCbq+UGnS5Dqqw3uNaKW6a9S3f7ySqnRZD1KweSkpqGmfO\n2OX18SbA31/nbd6Uzdt3cOjosexqq2qVEE78/gcXL150Wuate/bT8eYmOvONlUk9e5Yz6ed05tQz\n+Hh5EuBbTmduVJ8te/bTrL4wY1hfAMp5eZJ+/gIXL7pfZ+IorttKSkSqAkvRdpiSaC9eAxEZie68\na6DNxlFol14VwAsYrZT6REQ2oNXpp4H30aqejWinWHvrbfqLSGdr/7ehTTIRIjJSKTWmsD9TcdOs\nJyQmElYvR5SqleWJeHvb5T1ylJDKldi2YxcRTcORWjVZ8u57PNL9fg4fPc7R4ydIPp1CkJNyJ5xO\nIaxmaE7mcj7EJ6fg7VmWAF8fzp47z6ETJwkJDmT7Dz/SvEFdSpTwwLNEGQA+WLeRtk0bUaKEg+oD\nNxyTup4rqS7Al0qpDmgt1RrgG7vOo7RSKhLt0VtryT7v51I9+mDgPWt9mTzr9iql2qLdfB2BqXne\nw6FcTrPef/jIIqlZv0RZPiqG6LET6T8smiqVK0FmJu1at6RhWD169O7PkuXvUaN6tVyvc3Xmic88\nyYtzFjNg0hxCKpTP9Y+wblssH3z1LSN69XBF1GLLdVtJoSeMX21JPFcCW9ETw2ex3fqdDDQXkd7o\nOZsDyU09YIX1+CMgwm7dd9bv4+jO7nShpc+H4qZZDy4fREJiYvbyqfgEygfl2L4imobzzkJtGZs2\nZz4hlSsBMLhPjvcu6n/3ExjgPOlmcIAfCadTspdPJZ8mOMA3ezmigbB0wgsATH97JZWD9ef5LnYv\nC1Z+wmsjB+NjZzwudEwl5T4opfYCjYFvgYlA1Tyb/Gn9fhAIACKBe/LZlQ3decGlyvW/82znUIq8\nAvySvBGssQbq9/2kCC4flDvvM0Ny8n67iZYRzfjp5194YcwEADZu3kr9unXw8HDef+PWNzVgzZYd\nOvOBwwT7++FVtmz2+t5jppN4OlXr4L+Po1Xj+qSdTWfqkveYFzMQPxeZl4sz120lJSLd0A6wD0Uk\nARiDFhbmJQg4qJTKEJF7gdJ51h9AV2A7gNuv8rYZOLDNi5tmvUnjhoTVFbo9/jQ2DxujnnuWVR9/\nppXlHdpx///u5vH+g7Va/dGHCPDzw69cOTIzMunySC/KlCnNy2NGXv2NCpHwurUIq1GN7sPHax18\n756s/vo7vD3LcmuLpnS5tR1PvjRNZ76vM/7lfHhv7QaSU9MY/PK87P1MGvgklcvnLcoLATespGyu\nPJ53JZYTbD5wBj14PgZYBnwApAAJSqk51hm5j9C66MXo8atPgFvQA+cXgPeARGAb0EIp1dH+EgQR\neRnYC3yKVlB/oJQafKV8oVVvzDwUu6UwP7LDCA1vCTYbh37YefWNiwih9bSy/bcvV7o4ybXhUb/1\nFSvyv2N6OPwPuuT4ZQ4/Ksj1fs58s6KEUmoXuceP4NJDPpRSh4BGdk8ts36PARCRMKC/UmqTiHQH\nyluvC7Xbx9ArvYfBUGi44XVS120nVYikAQtEJBN9OPeYi/MYDG6F6aT+JUqpI0AbV+cwGAC3HJO6\naiclIt7Ao0B99NmrPcBbSqlzjo1mMBgMBauklgNJwCb0afRI9Fms/zkwl8Fg+Cdcj5UU4K+UutNu\neb6IfOuoQAaDwWBPQa6COygiFbMWRKQC8IvjIhkMhn9MZqbjf5xMQSqpasABEdmH7tTqAvtFZCOA\ndW+awWAwOISCdFIvOjyFwWAoHK7T66RK5PekUurrQs5iMBj+LdfpwPkIu8elgTD0mT7TSTmaks69\n4fdf45Hv91kRxfpjvviXa2MYrspVOylrvqVsRCQYPWuAwWAoarhhJXXNc1wopU6h51AyGAwGh1OQ\nK87fJvc8STeiZw0wGAxFDTespAoyJvWV3eNM9JxLax0Tx2AwGHJz1cM9pdQS4Bv03f5pwC6lVLqj\ngxkMhn9ARobjf5zMVTspEXkaWA90A3oAG0TkEUcHMxgMBijY4d5DQD2l1HkAEfFCHwIucWQwg8Hw\nD3DDMamCnN37O6uDAlBKnSVHUmAwGAwOpSCV1FERmQ18aS3fBhxxXCTDv2HC9NnE7d2PzQbRQ56h\nUf2cq0W++uZb5i1+m9KlS3HHrbfQ8/77OJuezvOjx5OSdoa//vyTfk8+RmTLvLMqOzDvtBnE/bBP\n5x06mEZh9XPybtjIvEVvUrpUKe64LYqeD3QlIyODUROm8MuBA5QqVYrRLzxHzeqhTssLMPGN94j7\n5Tds2Ih+/AEa1sp5/3XbdzP/g88oXaoknVs3p8ft+jLDn48cp//kV3nkzqjs5xzCdVpJ9UZ74x5D\nT3532HrOLRGRQ9ZEf/bP/UdE+rgqU0HZvms3h48eY8XieYx/8XnGvzwre11GRgZjp85g4YwpLFsw\nm/XfbuaPk6dY/cnnVK9WlbfnzWTmpLGMnz7rCu9QyHl37uLwkaOseHMh40fGMH7qK7nzTpnGwlnT\nWPb6PNZv3MQfJ0+xbsNG0s6cYfkbCxk/IpopM2Y7LS/A9n0/c/j3UyyfMJxxfR9m/OLluTKPW7Sc\nBdEDeHvMUNbv2MMficmkn7/A+EXLadGwrlOzugsFqaQeUEpNcniSIoxS6gtXZygIW77fSVS7SABq\nVg8lJS2NM2fO4u3tRfLpFMr5eGerq1o0b8rm73fi7+eH+vU3AFJT0/D3873c7gs/7/YdRLVvl5M3\nNdUu72nKeXsT4K/Fny0imrF52/ckJidnV1tVb6zCid//4OLFi5Qo4Zxbcrb+8BMdI27SmatUIvVM\nOmfSz+HtWZbktDP4eGndOkCLhnXZsudH7mp7MwuiB/D6h2scH9ANK6mCdFL3isgqpVTK1Td1PSLy\nKHrO8fKAoNXmB4AJwF/AMeBxoHve7ZRSi6zdRItIJFrueQ96FtIGwBz0CYMDaLForFLqSRFpDMy1\n9p8BdAXKAUvRyqy5wP1KqYesjAuBj5VSHxXmZ09ITCKsbp3s5QA/P+ITk/D29iLA34+z6ec4dOQo\nIZUrsW1nLBFNbqL3Iz1Y9cnn3Hpvd1JT01jwyuTCjHT1vPVyqosAf3/iExOtvP6cTU/XeStVYtuO\nnUQ0bYLUrsWSZct55MEHOHz0GEePnyD5dApBgQHOyXw6hbAaOcKfgHI+xJ9OxduzLAHlfDh77gKH\nfj9JSPkgtu9VNA+rQ8kSJSjppE7UHSlIJ1UWOCQiCrsB8yI+j1RDoBVQGz398Q3ArUqpoyIyB20l\nzsxnu6xOao9SKtry5T2Evj4si6bAA8Ap4JilaQ8GBiilYkVkDPpSjY+BcLTC6jQwTURuQLdha6Cf\noz58Fpl2NwrYbDYmjYomeuxkfLy9qFK5EpDJ/32+lsoVK7Bo1sv89POvRI+bzKq3Fjo6Wv55M/Pk\nfWkE0S+Nt/JWhsxM2rVuya64PfTo1QepVYsa1UNxpTsybxtP7P8oL859C2/PsoRUCLrCKx1ExvVZ\nSY11eIrCZ4tS6qKIHAN8gfNKqaPWuvVAO2BXPtthtw3AdqAt2k6cxa9KqT8AROSE9bqTwGQR8QQq\nk+PmO6CUSrS2/QToDPwOfKuUKvQzpMFBgSQkJmUvn4pPoHxQjiU3oslNvLNwDgDT5i4gpFIltu/a\nTZsWzQGoW6cWpxISnHb4FFw+iITExJy8CXnyNm3CO4vm67yzXyWkciUABvd9KnubqLu7EBjg7/Cs\nWQT7+5FwOkd0fSophWD/nP86EWF1WDpuGADTl612jKX4OqMgA+cl8vnJFJHKjgz2L/nb7nEAWiCR\nRWn0IVne7ey3ybzM47yvyXrdTGCmUqodsMBunX1H9Bb6MPBu4J0rhf+ntG7RnDVffwPAvp8UweWD\n8PbyzF7/5MBhJCYlk37uHOu/3UzLiKZUuzGEuL0/AnD89z/wKlvWaeM7rVtEsGad/j7Y96MiOCgI\nby+vnLwDBpOYlGTl3UTLiOb89PMvvPDSOAA2bt5C/bp18PC45vvk/3nmxvVZs3WXzvzbEYIDfPEq\ne0P2+t7jZpGYkkr6+Qus37GHVo2cey9+Zmamw3+cTUEqqRj04cnP6BuLBa0Kry4iE5VScx2YrzBI\nRneqVS1HXjvgO6782SPRuvUWwI8FeI8g9BTLZdDV0ta8GyildotICPrQMPraPkLBaNKoIWF169Dt\niT7YPDwYNWwwqz75HB8vL27t0Jb7/3cnjw8Ygs1mo/ejPQjw8+OBe+4meuxkej41gL8vXmT08CGO\niJZ/3saNCKtbl26P9cJm82DU8KGs+uhTfLy9uPWW9tx/z395vN8gbDbo/ejDBPj74edbjsyMTLo8\n/DhlSpfh5XGjnZYXILxuTcJqVKV79GQ8PGyMeLI7q9dvxtuzLLfeHE6XqDY8OXYmNmz0vuc/+Jfz\nZt+Bw0xespLj8YmULFGCNVt2MWvY0/j5eF39DQ0F6qSOAM8opfYBiEh9YABwK/qevqLeSQH0At4R\nkb/Rg97LgZ5X2D7M7pKD0cC9V9n/bOBDa9+z0QPsK/LZbi3go5Ry2NfR0P5P51quW6dW9uNOHdrR\nqUO7XOu9PD2ZOfElR8W5KkOf6ZtruW6d2tmPO93Snk63tM+13sPDg0kvjcCVDOmZ+79D3dAbsx93\natGETi2a5FofVrMab41xUud/nY5J1crqoACUUvtFpL5S6ryIFLkpW5RSb9o9PgOEWot5LcP5bqeU\nCuVS3rR73MzudVmPX7N+slidd1sRsQHtgdy9iMFguCIF6aTSrbNcG9BjOa2A0iJyG/r0uuEqiEgo\n+vDxPaXUry6OY3BnrtPrpLoDg4Gn0APtPwFdAC/06XnDVVBKHUJfumAwGK6RgsxxniQi04A66Erq\nZ6VU6lVeZjAYXECmG45JFWQ+qWeBX9Gn2ecCv4qIwy9ENBgMBijY4d6jQM2s22JExJ/ic1bPYLi+\ncMMxqYJcBXfc/r49pVQy+lS7wWAwOJzLVlIi8rj18IiIfISejTMDuAU9dYvBYChquOGY1NWuus4i\nEX2zLEAK+syewWAwOJzLdlJKqcecGcRQzMnIIDM1wdUpCk4JrbD3qFzrKhsWL1w5I4SjKIgc9CiX\n3mSLUqpqPpsbDAZDoVKQs3v2t5OUBjoCnpfZ1mAwuJLrbEwKAKXU4TxP/SIia4DpjolkMBgMORTk\ncO+WPE9VBWo6Jo7BYPg3XJdjUsAI9JiUDX0JQjr6Pj6DwWBwOAW5mPMt9B38twJl0AICcWQog8Hw\nD8nMdPyPkymod28h2pjyA3repfsdmMlgMBiyKUgndc6SBnQG3ldKZZDPJQkGg6EIkJHp+B8nU5Ax\nKURkLnqe814i0hKtiDIUQYqbZn3i3IXs3q+w2WzE9O9FQztv4LrvtjJv6QpKlypF51va0vOeO9m2\n+wcGjZ5ErVB9mV6dGqGMeMa5Q6QTZr5qtbGN6EH9aFQ/xx341cZNzHtzmW7jqA707PI/3v/4Mz76\n4qvsbfb+pIhd96lTMxdnCtJJ9UB75mZZ+qdQnDAFroi0B/orpboUYNsuSqmVjs5UGFzL57pW7DXr\nBw4eInrsZFYsngfkaNZXv70IP99y9Bo4jKh2kXz1zbdUr1aVIf2e4mR8Ao/0HcQX7y8t7Gj54tVk\nvgAAIABJREFU5939A4eOnWDF3Jc5cPgo0VNmsmLuyzl5Zy1g1Wsz8CvnQ6/nRxPVpgUAzRs3YNZL\nLzgl4yWZY+N0Gy+cw4FDh4keP5UVliYsIyODsdNns/qN+bqNn32BqLat6XpXZ7re1Tn79Z+v2+Cw\nfO54du+qh3tKqd+VUjOUUspaflcpFef4aNfEcFcHKApcTrMO5NKse3h45NKsn07Rcxg6W7O+dVdc\ndsdTs9qNpKad4czZdJ03JRUfby8C/Hzx8PCgZZPGbN6522nZLseWHbuIatsagJqh1UhJO8OZs3Zt\n7G3Xxs3C2fz9zlyvn7v4bfo+Zia0vRYKdLjnQrxFZCn6jOL7wEq0iSUTbRV+FG2CaWyp4O8VkfHo\nm6NLAHOUUu+KyJtoB14g0A0tTaiBPls5Uim1VkSeR1thMtAK9AmWaj1Lz37Ueq8HgJuVUv1FpCf6\nivzl2FVHIpKglAoSkSi0XPVPtFrLoScciptmPT7pNGF2NpsAP1/ik5Lx9vIkwM9X5z12gpCKwWzb\nvYeImxoSUrECBw4fpU/MWFJS0+j3SHdaNwu/wrsULgmJyYSJXRv7++o29spq43QOHT1GSKWKbNu1\nm4jwxtnb7tn/E5UqlKe8I5Xw1+MV5y6mPlAXXfEdRDvznlJK/SIifYF+SqnxIvK81UFFAtWUUm0t\nB94uEfnQ2leSUqq3iDyMNhq3swSnG9BTIw8FKqHdglmHs7OAjtYUylOArkqpt0XkYRFpAgwCooCb\nLpPfH3hQKXVQRN4CbiO3st2hFHvN+vBBxEyZibeXJ1UqViAzM5PQkMr0e7g7t3dow9ETf/DIs9Gs\nWfoapUuVclHmnMc2m41JI54nesJUfLy8qFKpUq5TTCs//ox7Ot/m/JDFnKLeSe1SSqVDthIqAlgo\nIqCroO/zbN8KaCEiG6xlD3THA1qZDloztQFAKXVCRC6ISAC6SvsKbRdeJiIVgNrAKuv9vICs2/yf\nATYCzyqlTlvr8yMeeF1ESqIrt69xYCdV7DTrQQHEJyXn5E1MonxgjjI94qaGLJulK7tpC5cQUrEC\nFcoH0vkWfUhbNaQSQQH+nEpIpEqlig7PqzMHkpBk18YJiZQPtGvj8Ma8M2+mzjzvdUIqVchety02\njhefHeDYgNfjmJSLyas0Twc6KKXaK6VaKqWeybP+T2CRtb69UqqeUuo3u3WQc/V8FqWBDKVUH3QF\nVZEcfddxu301V0pNsV4TiO5sqtjt056sr/XF6MPAdsD/XcPn/kcUO816s3DWbtys8/78K8GBAXh7\n5uTt9fwoEpNPk37uPBs2b6dV08Z8/OUGFq1YBUB8UjKJyacJtuuIHZ755masWb9RZ1Y/ExwUmLuN\nnx2e08bfbaFlcy0JOhmfgFfZsi6r+IozRb2Syksc8B/gcxHpBsQrpdaR09luA14WkcnozmeqUirv\nV9f3QAdguYjciO6MMkVkpFJqDDBGRNqiD/uwRKj7RWQAem73/cAUoC2wUkTeBVKxKjYRaQT4WO/l\ni57Z1M96zz2F3B65KHaa9Qb1CKtTk279h+FhszFyYB9WffGVzhvZkq533MYTw0bqvA92xd/Xlw6t\nIxg67mW+3rSNv/76m1GD+jr1D79JwzDCpA7deg/QbTzkGVZ9+gU+3t7c2q4N9999B48Pfl5r1h/u\nToB1IiI+MYkAfz+H53NHW4ytqJ6yzHuqXkQS0APir6E7lnPo8Z4kEVmH1pdHWAPnUehq6VWl1JvW\nwPlKpdQn1qHXfPRN0qWBF5RSG0VkNnAzWni6WSn1ooi0Aaahq7ATwMPoQ72SSqmJ1s3XzwJ3A18A\n3sAm4D6lVA0RGWOt+xn4FK1sj7bWX/EShNCqN2Ye+mHHv2xF5xDasBlkZnJw29eujlJgqrfoCMCh\n2C0uTnKNBFaxXWl1epc2Dv+D9lz53RUzFDZFtpO63jGdlGNx207qvtaO76Q+2OTUTqqoj0kZDIbr\nnOI2JmUwGK5AZoarExQ+ppIyGAxFGlNJGQzuhBuOMZtOymBwJ9zwEgRzuGcwGIo0ppIyGNwId7yk\nyFRSBoOhSGMqqaKKzQa2YvIdYrOBzYbNt7yrkxSci38BkHHiVxcHuTY8AqtceQMzJmUwGAzOxVRS\nBoM7YcakDAaDwbmYSspgcCPccaoWU0kZDIYijamkDAZ3ogiMSYnIK0AL9Iy1A5VS31vPhwDL7Dat\nAQxXSr1zpf2ZTspgMBQaItIOqK2Uaiki9dBTaLcEUEodB9pb25VET9P90dX2aQ73DAY3IjMj0+E/\nV6Ej8CGAUupHwF9EyuWz3aPAB0qpM1fboamk3IwJ02cRt3cfNmxEDxlIo7A8mvVFS7QCvFMUPe+/\nj4yMDEZNnMovBw5SqlTJ/2fvvsOjKvO/j78nFZJJhYQSSgDhBkIRpEgTpLnys+0KioCCFAtgAWSl\ni0IUkCJFAUFApdp4Vt11YUWQ3jvIjQQDoacS0mjJ88eZJJMQAtFMyfB97ZXLmTlTPmHJzfeczJwP\n40cOp0Z4VfvlnfoRBw4dNirLhw+hQUTd3LwbNjJ34WJL3k706t7NyBs5md+jThp5R71NjWrhdssL\n8MHirzjw+0njz7jvs9S/L/f11+3cz7xv/4OXpwddWjWl56MPA3D89FkGT/6E3o91zLnNRZUHrBtR\nYy23Jee7X3+g8908oUxSf4FSqo9Sauptti1RSj1WwO3FXq+ebeeefZaa9flEjh1B5LSPcrZlZmYy\nYcoMFsz8kGWffsz6TVu4cPES637dxJWUVFYumkfk2JFMmfmxreIVkHcvp07HsOrzhUSOG0XklOl5\n806eyoLZ01m2cB7rN2428m7YyJWUFFYuWUDkuNFMmTHbbnkBdh45zqnzl1j5/ggmDnyByEUr82Se\n+NlK5o96jS/fe4v1uw9yIT6RtIyrRH62kgfr17Z9wKws238VzS2nGlZKtQCOaa3zL1wFkkXK/mxW\nCX9LzXpyQTXrQbk16zt3Ex1zJmfaqlIpjHPnL3Dz5k1bRcybd+duOj78kJG3erV8tfBJ+Pv55eZt\n1oStO3YSfTqGBvWMaatK5Uqcu2C/vADbDx2jQzOjC7ZGpQokp6SRkpZuZL6Sgp9vaYID/IzM9Wuz\n7eBveHl6MH/Ua4TaoS3GCZzDmJyyVQTO57vPYxgdl3dFFqlioJSapZTarZT6XCm1XSkVbtn0uFLq\nZ6XUAaVUY6XUcCyV8LbIERcfT5DVD0JwUCCx8fE5l1PT0og+HcP1GzfYsXsvcQkJ1KpRnc3bd3Lz\n5k1ORp8m5uw5EpMu2yLerXnj4gkKyi0DNWrhs/MGkZqaSvTp01y/foMdu/cYeWvWYPO2HZa8p4g5\nc5bEpCS75AWIS7pMsL85N7O/H7FJyTmXU9OvEn3+Itdv3GTnYU1cUjIe7u6U8vayT8DMLNt/FW4t\nkN3w1Bg4p7XOX4jbFKOe7q7IMam/rprlqwkQAeyz2palte5o2e0brbV+OrsS3h7Bbqktf2c0oyZ8\ngJ/ZbNSsZ2XRtlUL9h48RM+XBqNq1qB6taoOO93HLbXw741j1PhII29YRciCtq1asnf/QXr2fwVV\n8z6qVwt36G/d82f+YHAfxnz8BWaf0oSVK+u4YA6itd6qlNqjlNqKUT03SCnVB7istV5tuVsF4NLd\nPqcsUn9dI+C/WutM4JBSKtpq23rLf3cCk2wdJDSkLHGWSQSya9Zzf1CaPdCI5Qs+AWDanHmEVTQa\n6Ie8+lLOfTo+9QxlgnOnG5vnjcuf16qy/IHGLF8038g7+5PcvINeyc37xNN2ywsQGhRIXFLuoZRL\nCZcJDQrIud4sohZLJw4HYPqy1VQMsV+7MjjH+aS01vkPaRzIt71+UZ5Pdvf+OhPGvxjZsu7isk20\nat6MNes2ALepWX99mFXN+hZaNGvCseO/M/K99wHYuHU7dWvXws3NPn8tWrVozpp1Rlffkd+OWfL6\n5uYd/CbxCQlG3o2badGsqZF3/EQj75Zt1K2t7JYXoFXDuqzZvtfIfPI0ocEB+JYulbP9pYmziL+c\nTFrGVdbvPkjLBnVu91TiLskk9ddFAQ8opUxAbcD69/dtgK8w3n37m+U2m/1ENW5Yn4jaiu59X8Hk\nZuKdfw7lux/+g5/Zl04Pt+WZp56g7+Ahlpr15wkODCTQ35+szCy69h6At7cXU98bZ6t4BeRtQESd\n2nTvM8DIO2I4333/o1FZ3r4dz/z9SfoOfMPI++ILBAcFEhjgT1ZWJl2f74u3lxdTI9+1W16ARrVr\nEFG9Cs+Nmoybm4mx/Z9j9fqtmH1K06l5I7p2bE3/CTONmvW//40gfzNHok4x+fNvOBsbj4e7O2u2\n7WXW8FcI9PO98wsWlRNMUsVNGoz/Asu+dj0gBKiDcTyqOfA4MAG4AoQDlYHntdaHrCvhC3vu8KpV\nsqIP7SnsLk4jvP4DAEQfvetjoQ4XXsuYcE7+d+Ud7ulc3Oq3K7Q9+HK7hjb/gQ7YcMCuDcYySf0F\nWuslSilv4FmtdW+llC9wDDivte5zm8d0sGdGcY+RsyCI/LTWV4GmSqndGAfKx2qtbzg4lhAuQyap\nYqC1fs3RGYQA5/jtXnGTSUoI4dRkkhLChWRl3vk+JY1MUkIIpyaTlBAuRI5JCSGEnckkJYQLccFB\nShYpp1ZSatazZdrvvE5/meXP1uRnvw8niz9HFikhXIgckxJCCDuTSUoIF+KCg5RMUkII5yaTlBAu\nRI5JCSGEnckkJYQLccFBSiYpIYRzk0nKxbw/bSYHDltqy4e9eWtt+aIleHl6GTXrz3a11KxPMWrL\nPTwZP2o4NcLD7Zz3iFVeq1r4DZus8nawyvthvrz2q4UH+GDeEvYf+x0TJka/2of66r6cbeu27mLu\niu+MmvW2rej15N9Iz7jKyKkfE5d0mWvXrvNqj6d5+MEHbJIt0wVHKVmkbEgp9TegmtZ6rj1ez6hZ\nj2HV4gVE/RHNqPciWbV4AWCpLf9wOquXLiYwIIABrw+lY7uHOHT0N0vN+qecPnOGyKkfMf+jApvj\nbZT3DKsWf2rJ+z6rFn+aL+8iS95hVnlTWLloviXvTOZ/9KFd8gLsPHiU6LMXWPVRJFGnzzBq+lxW\nfRSZm/njRXz38WQC/c0MGPMBHVs2Ze8RTb1aNej/zJOcvRhL35ETbbZIuSJZpGxIa/1fe77etl27\n6djOUlteLbdm3Wz2NWrLzUbNOsCDTZuwdecu4hMSrWrWK+XUrLu7u9sp76218Ebey7fJm5QzHdo7\nL8D2fYfo2LKpkblKJZKvpJKSmobZ14fE5Cv4mX0IDvQHoMX99di67xD/6Nwu5/EXYuMpXzbYZvlc\ncJCSY1LFSSnlqZRarpTaopRap5QarZSaarl9lVJqo1Jqh2XCKnZx8fEEBVrXrAflrS23rlnfs5e4\n+ARq3ZevttyeNevxCQQFWtWsF1YLv2cvcfGJ1LqvusPyAsQmJhEc4J+bOcCf2MSknMup6RlEnz1v\nZD5whPjE3Ar47m+O4a1JMxn1Sh+75XUFMkkVr97ABa11D6VUdyDI8lUfKKu1fkgpFQh0sUeYW2rW\nx49l1Hvv42f2NWrWsdSsHzhIzwEDrWrLHVSzbvWyRt4xlrzmfHkP0XPAIIfXwkMB1fBvDWL0tLmY\nfX2oVD40TyPsyo8m8ltUNMOnzOZfcz/EZCr+ZqgsF2yLkUWqeDUG1gForVdaevnAqLnyU0p9CawG\nbFL2Flq2LHHxCTnXL8Xlry1vxPKFxuGxaXPmElbBUls+8OWc+3R8sqv9atbL5quFv+u8VrXwT3az\nb816maCcyQngUnwiIVav36xBXZZNfw+AaYuWE1YuhMO/n6RMgD8VQstSp0Y4N2/eJOFyMmUCA255\nfnEr2d0rXjcp4M9Ua52G0WI8H2OKWmiLF2/1YHPWrFsPWGrWy+arLX99aN7a8uaW2vJ3jQO/Rs26\n/WrLWz3Y7A55rWrhN26hRXNLLfy71rXwdq5Zb9yQtZu2G5l/P0lomSDMPqVztg8Y/T7xSZdJy8hg\nw/Y9tGxUn92HjrL42x8BiEtMIi09gyB/P5vky8qy/Ze9ySRVvHYB7YGvlVKPARUBlFKNgbpa66VK\nqR3AJlu8eOOG9Ymoo+je9yVMJjfeeXsY3/3wb6O23LpmHYza8uya9awsur7Qz6hZnzDeFtEKyVub\n7n1ftuQdmi/v4/Qd/KZRWf6iVS18ViZdX+hvyfuO3fICNI5QRNSsTvc3x+DmZmLcoH58t3YDfr4+\ndGrVjG6PdqDfyIlGNXz3pwgK8Kf7/3Vm9PS59Bw6joxr1xg7uJ9dF9aSTmrWi5FSygtjSqoKXMco\nCw0CIoEVgC/GtDVba/1tYc8VXrVKVvThfbYNXEzC6zUCIPrwXgcnuXvhdRoA8MfmnxycpGhM4Q0L\nPZAV07C2zX+gKx84JjXrJZXW+hrwwm022+Q3ekK4OlmkhHAhrrhnJIuUEC7EBdco+e2eEMK5ySQl\nhAtxxQ8YyyQlhHBqMkkJ4UJccJCSSUoI4dxkkhLChchbEIS4naxMsjJSHZ3i7mV/LMVHPuTr7GSR\nEsKFuOAgJcekhBDOTSYpIVyIKx6TkklKCOHUZJISwoVkZTo6QfGTSUoI4dRkkhLChcgxKSGEsDOZ\npFxMSatZ/2DWXPYfOYbJZGL0G69Sv47K2bZu01bmfr4cL09PunRsR6+nnwTgh7XrWLjsa9zd3Xi9\nf2/atWxut7xG5nnsP3oMkwlGv15A5i9WGJk7tLXK/AsLl3+Fu7s7r/d7wWaZXXCQurcmKaVUH6XU\n1Hy3rVRKlb7dY2yUI84Wz2tdsx45dhSRU2fkbMuuLV8wcxrLFnzC+k2buXDxEut+3ZRTsx45biRT\nPppji2gF5913kOgz51g1fyaRI4Yy8aNP8uadMYdPP5zI0o+nsX7Ldi5ciiXxcjJzFi1l2dzpzJsy\ngXWbttotb27ms6ya9xGRbw9l4sy5eTN/9DGfTpnA0jlTWb9lR27mxUtZ9sl05k1+j3Wbt9k1c0l3\nz09SWuvujs5QXEpazfr2Pfvo2KalkTe8CslXrpCSmorZ15fEy5fxM5sJDjIamVs80Iitu/dRytuL\nlk0aYfbxwezjw4S3h9g8591nTs6X+f6CM//zTZvlc8XzSd2Li1Q1pdR/gMrADGAcUA9oCUwE0oGL\nQE8gBPgM8MJoeekPeANLgRZANWCV5fJ5rXVZAKXUN8Ac4ATwpeV1PYHeWusoW31jcfHxRNTO3fXI\nrlk3m33z1KyHVazAjj17ada4EarmfXy+fCW9n3uWUzFncmrLy5YJtlXMHLHxCUSomrl5AwOJjU/E\n7OtLcGAgqWnpRMecJaxCOXbs3U+zRg0BSL96lVffHkfylRQG932eFk0a2TxrTuaExHyZA6wyB+TN\nvO8AzRoZrTTpGVd5dcQ7JF+5wuAX7Zu5pLsXF6laGE3D/sABjMUHYDAwTGu9SSn1D6AMMAGYprX+\nWSnVBRirtR6glPoJ6As8Aryhtb6ulLrlhYAKwHta6/VKqb7AQGCYLb85ayWvZj1f3tHDGf3BNMxm\nXypVKJ+zPelyMnPeH8+5ixfp/dpwfvl2qU0qy+8uc+5lk8nEpFFvMXrSNMy+2ZmNbUnJycyJfMfI\n/Po/+eWbL21Ts+56g9Q9uUht1lpfB+KVUslAFcvtXwPzlFLLgBVa6wtKqZaAUkqNAdyBWMt9PwC2\nAAe01lsKea0LwCyl1LsY/Xt7bPD95Ch5NetliM2TN56QsrkTXLNGDVj2yXQj77zPCKtQjoyr12hU\nvy4eHu5UCauIr48PCUlJlAmyY+aExMIzf5ydeRFh5cuRcfUqjepZZy5NQtJlylh2C0Xh7qkD5xb5\n/63JAtBafwk8DMQBPyilagPXgG5a63Za6zZa639YHuMDmIByt3kNT8t/3wPWaK0fAt4txu+hQCWu\nZr3ZA6zdYJQ5H9G/E1q2DGYfn5ztA4aNIj7RqFnfsGU7LZs0pnWzB9ixZz+ZmZkkXk4mLT2doAD7\nnW6lVdPGhWd+azTxlir1DVu307JJIyPzXuvMGQQF+NskX1ZWls2/7O1enKRaKKXcgWCMRuEEAKXU\nWGCO1vpTpVQoUBfYATwFzFVKtQfKa62XY0xS7wCPKqWe1VqvArKUUtl/W7MPOJQFopRSJuBJjGnM\nZkpczXr9CCJULbq/8iZuJhPjhg7mu/+sNSrL27am2xNd6DdkpFFZ/nx3ggKNxahzuzY8+/IbAIwZ\nMsiuleVG5pp0f/VN3ExujBs6yMhs9qXTQ63o9vij9BtqydwrX+ZXLJnfHCg160VwT9WsK6X6YBxH\n8gbuA6ZgHCyvBzwNvA4kWr56A4HAYqA0xsTVBwgFxmutH1VKlQF+xTjo/hbwd+AoxoH2mYAZmApE\nA7OBT4EXgeXZB9lvp8TVrGdl8seewvZ8nUu1B1oB8MfuzQ5OUjSm0PBCD2QdCq9m8x/o+tF/2PUA\n4D21SJUkskjZlixSf569F6l7cXdPCJflikOH7BgLIZyaTFJCuBAXHKRkkhJCODeZpIRwITJJCSGE\nnckkJYQLycp0vVFKJikhhFOTSUoIF+KCg5QsUqKYmNww+djvg75/2Y0bAGSdOe7gIEVjCg13dAS7\nk0VKCBci7zgXQgg7k0lKCBfienOUTFJCCCcnk5QQLkQmKSGEsDOZpIRwIfLbPSGEsDOZpFzM+9Nm\ncuDwYUwmE6OGvUmDiLo5237esJG5i5bg5enF/3XuSK9nu5KZmck7H0zh96iTeHp4Mn7UcGqEhzsk\n+/ETUQwcOpw+PZ6jV/duebZt3bGT6XPm4u7mxkOtWzJoQD+HZAT44ItvOXAiGhMmRvV+mvo1quZs\nW7f7IPNWr8HL04MuLRrT85G2AByPOcfgqQvo3aVdzm224HpzlExSd00p1UcpNfU225YopR4r4Pau\ntk+Wa+eefZyKiWHV4gVEjh1F5NQZOdsyMzOZ8OF0FsycxrIFn7B+02YuXLzEul83cSUllZWLPiVy\n3EimfDTHnpFzpKWnM2HKVFo0bVLg9olTpjH7w0msWLyALdt2cOLkSTsnNOw8+junLsSy8r1hTHy5\nB5Gff5OzLTMzk4mLv2b+26/w5bg3WL/3MBfiE0nLuErkkm94sF4th2Qu6WSRsq0R9nyxbbt207Hd\nQwDUqBbO5eQrpKSkApCYlIS/2UxwUBBubm482LQJW3fuIvp0DA0i6gBQpVIlzp2/wM2bN2/3Ejbj\n5enJglkzCA0JuWVbzJmzBAT4U6F8Odzc3GjbuiXbdu62e0aA7UeO06GJUZ1eI6w8yalppKSlA5B4\nJRU/39IE+/sZf8YRim2HNV6eHsx/+xVCg2z/saFMO3zZm+zuFZFSahZGhdURQAHdLZseV0q9CYRg\n1FZ1ABoqpb4DngFWAJWAncCzWuvyxZ0tLj6eiNq5de/BQUHExsdjNvsSHBREaloa0adjCKtYgR17\n9tKscSNUzfv4fPlKej/3LKdizhBz9hyJSZcpWya4kFcqfh4eHnh4FPzXMTY+nmCrhuLg4GBiYs7Y\nK1oecUnJRFSrnJvFz0zs5SuYfUoT7G8mNf0q0ecvERZShp1Hj9O0bk083N3xcLdp5aJLk0WqaKpZ\nvpoAEYB151SW1rqjZbdvtNb6aaXU21rrfyil/g8waa1bKKVaYPT72Zz1b3pMJhOTxo9l1Hvv42f2\npVLFCkAWbVu1YO+Bg/QcMBBV8z6qVwt3/t8QOVE+6yQmk4kPXu3FmPnLMPuUJiykjN2zOtEfTbGR\nRapoGgH/1VpnAoeUUtFW29Zb/rsTmJTvcXWA7QBa621KqQxbhAstW5a4+ISc65fi4ggpWybnerMH\nGrF84VwAps2ZS1iFCgAMGfhyzn06PtmVMsG5U4szCA0pS1xcfM71i5diC9wttEuWoADikpJzrl9K\nvExoYG5lerO6NVk6fggA01d8T8WQMrc8hygaOSZVNCby7pZn3cXl7MdZ32aTgz6tHmzOmnXGWnnk\nmCa0bFnMvr452/u/PpT4hATS0tNZv3EzLZo35djx3xn5biQAG7dup25t5XQV4JUqViQlNZUz585x\n48YN1m/aTKsWzR2SpVWD2qzZsR+AI3/EEBoUgG/pUjnbX5r0CfGXr5CWcZX1ew/Tsp663VPZRJYd\n/mdvMkkVTRTwgFLKBNQGqlptawN8BTwI/Ga5Lfun/RjwPIBS6kHAFxto3LA+EXUU3fu+hMnkxjtv\nD+O7H/6Nn9lMp4fb8sxTT9B38BBMwEsvvkBwYCCB/v5kZWXR9YV+eHt7MXXCeFtEu6PDR39j8oxZ\nnD13Hg8Pd9as+4X2bdtQqWJFOrVvx/iRbzNs5FgAunTuSLWqVRySs1Gt6kRUr8xz46bj5mZi7Ivd\nWP3rdsw+penUtCFd27ek/wcfY8LES092IsjfzJGTp5m8dDVnYxOM723HfmYN7U+gufj/Grjg3p7U\nrN8tpVQfoB7GgfE6GMejmgOPAxOAK0A4UBl4Xmt9SCm1DvADWgFfA6HALqCn1rpsYa9X4mrWgeij\nBx2c5O6F16wNwMkfv3BwkqJxa9y50IrzTeUq2fwHus3FM1Kz7oy01kuUUt4Yv5nrrZTyxZiQzmut\n+9zmMR2srj6VfUEp1dOmYcU9yxVHDuc6+ODktNZXgaZKqd0YB8rHaq1vODiWEC5NJqki0lq/VgzP\nUeiunhB/lisWMcgkJYRwajJJCeFCHPEWAVuTSUoI4dRkkhLChTjDHKWUmoHxfsEs4A2t9S6rbZUx\nPsfqBezVWr9yp+eTSUoIUWyUUm2BmlrrFkA/YFa+u0wDpmmtmwE3lVJ3fFeuLFJCuJCsLNt/3UEH\n4P8BaK1/A4KUUv4ASik3jE9mfG/ZPkhrffpOTyi7e6L4mOz6RuS/5qpxDqisrWsdHKSIGnd2dII7\nKQ/ssboea7ktGePTGleAGUqpxsAmrfXIOz2hTFJCuJAsO3wVkSnf5TBgJtAWaGQ5jVEI4pj0AAAg\nAElEQVShZJESQhSncxiTU7aKwHnL5TjglNY6Smt9E1iHcV62QskiJYQLySTL5l93sBboCmDZpTun\ntb4CYPkI2UmlVE3LfR8A9J2eUI5JCSGKjdZ6q1Jqj1JqK8a51wZZziByWWu9GngTWGI5iH4I+OFO\nzymLlBAuxBneJ6W1zl9AcsBq2wmgdVGeT3b3hBBOTSYpIVyIK57DUiYpIYRTk0nKxZT4mvUhb9Gn\n53P06v5Mnm1bt+9k+pxPLDXrrRj0kuNq1idtOsyBC0mYgJEP1aN+ucCcbcsP/sEP+izuJogIDWTk\nQ/UA2HU2jiE/7WFih/tpV62czbK54CAlk1RhlFIblFL18t1WTym14S4fH245i6ddlPia9clTadGs\naYHbJ06Zxuypk1mxZCFbtm/nRJRjatZ3nY3jVFIqK7q1ZkKHhry/8XDOtpRr11m0N4ovn27J0q6t\niUq4woELiZy+nMqSfSdpVMG+hauuQhYpF1Lia9ZnzyA05NaTlt5Ss96qFdt27irgWWxve0wcHaob\n71WsEexHcsY1Uq5dB8DTzQ1PdzfSrt/kRmYmGTduEuDtSYiPN7O6NMXPy/Y7LlJp5cKUUu7Ap0B1\nwBMYZ7WtEkbby1Wsfp2qlPoHMAy4AezWWg+zvCfkUYx32o6w3O9RoIfWOrvWagHwg9b6++L8Hly2\nZj0unuCg3F2q4OAgYs6ctVe0POLSrlI3NDdLUGlv4lKvYvbyxNvDnYHNatH583WU8nDn0ZoVCQ8y\nOySnK5FFKlcPjOaXfkqpssAvQHYd8OvASq31TKXU20BDpZQZGAO00FpfVUp9pZRqZbl/FaAlub18\na4GZSqlSwDWMiqtBtv6GpGbdHnKzpFy7zqe7f+en5x/G18uTvqu3ciz2MrVDAuyWxhXPcS6LVK6W\nQBulVPYbzUpjnJgLoC7GJAWwAWNSisBYjNYopQACyF2Udmmtsyy3o7W+qZT6EeiC8TmmTVrra8X9\nDbhszXpovpr12NgCdwvtIcS3FHFpGTnXL6VeJcTXaDCOSkihsr8vQaW9AWhcsQxH7LxIuSI5JpXr\nGhCptW5n+appuQ3y1qu7Wd1/j9X9G2mtl1tty+8LoBvwBLC8gO1/2T1Ts77RgTXrVUJYe8L4vOzR\nS0mE+nrjaznWFObvQ1TiFTJuGMf0jlxKomqATcqqb8sJz4Lwl8kklWsH8CSwQikVivEZo2waaIJx\nnpyHrW6ro5QK1VpfUkq9i3FMq0Ba6/1KqTCMFuNRtvgGSnzN+vSZlpp1D9b8bKlZD6tIp/YPM37U\n2wwbMQaALo90olrVqnd4RttoVCGYiJBAeny9GTcTjGlbn9W/xeDn5UHHGhXo27gGfb7bioebifsr\nBNMkrAy//nGRRfuiOJmYwpHYyyw9cJKFT7VwSP6SSGrWLZRSHsA8jF07d2A88DYwGONEXV8BScBB\noKnWup3lwPkojAPq+4DXgN5APa31W0qpcOAbrXUTy2uMAfy01m/fKU+JrFn/7ZCDk9y98MrGWWuj\n3h3g4CRF4z54aqFnFvy+TAWb/0A/EX9eatYdwXIaif75bv7J6vIt+xda6++A7/LdvMRqezTGBIZS\nygS0A+544nkhRC7nOvjgoiwT1W7gf5ZPgQthE/I+KfGnWCaqBxydQ4iSSBYpIVyIKx5ilt09IYRT\nk0lKCBeSeee7lDgySQkhnJpMUkK4EBc8JCWTlBDCuckk5cyc7DN0hcoCrl91dIq75+MHgFu3gQ4O\nUrxc8RMkJeinQAhxL5JJSggX4npzlExSQggnJ5OUEC5EJikhhLAzmaSEcCEySQkhhJ3JJCWEC8l0\nwfdJySLlYt6f+hEHDllq1ocPubVmfeFivLw8+b/OnejVvZtRsx452ahZ9/Rg/Ki3qVEt3H55p8/i\nwOGjllr412lQt05u3l83MXfRF0beTh3o9czTRt5JU/k96g8j74i3qBFu3/OdfzB7PvuPHsOEidGv\nv0z9Orldh+s2bWPulyvw8vSkS/u29Hr6CQB+WPsLC1d8g7u7O6/3e552LZrZNXNJJosUoJTyBDYD\nx7TWvYvpOcOxOr+5Pezcs5dTp2NY9flCok7+wah3I1n1+ULAUrM+eSqrl39OYEAAAwYPoePDbTl0\n5ChXUlJYuWQBp2POEPnhDObPmmafvHv3cSrmDKsWzSPqj2hGTZjEqkXzcvN++BGrv1xo5H3jLTq2\nbcOho8eMWvjP5nL6zFkip81k/owpdskLsHP/QaLPnGPV3BlERZ9m1OQZrJo7IzfzR5/w3WezCfT3\nZ8DwsXRs0wJvb2/mLFnOtwtnkZaWwezFX9pskXK9OUoWqWwVAO/iWqAcZdvO3XR82FKzXr0al68Y\nNetms69Rs+7nR3CQ0an3YLMmbN2xk/iERBrUM6atKpUrce6CUbPu7u5u+7y79tCxbRsjb7XwfHkv\n4+9nzs3b9AG27tpt5M2phQ/j3IWLdssLsH3Pfjq2MZpeaoRXIflKCimpqZh9fUm8nIyf2UxwoNFw\n3OKB+9m6Zz+lvLxo2eR+zD4+mH18mDD8DbtkdRWySBlmADWUUosBPyAI48/mNa31QaVUFLAA6Aqc\nwKi26gb8rrXuqZRqCHwMXMc4pU836ydXSrUB3rdsjwEG2KIcNC4unog6tXOuBwcG5q1ZT00l+vRp\nwipUZMfuPTRr0tioWV+2kt49uhs162fOkpiURNkyZQp5pWLKG5+QtxY+T97AfLXw+yy18DX4fMVX\n9O7ejVNnztq9Fj42IZGIWjWtMgcQm5CI2deX4MAAUtPTiI45S1iFcuzYd5Bm99cHID3jKq+OGE9y\nSgqDX+xJiwca2SSfK55PShYpwzDgG+AkRtX6QqVUXWAm0Amj4movMBk4DXyrtW6mlDqtlArE6NJ7\nTWu9Tyn1HtAT+MHq+WcBHbTWCUqpKRiL2DJbf1PWJ803mUxMem8co8ZH4mc2UymsImRB21Yt2bv/\nID37v2JVs27rZHeZ951RjJowKW8tfMsH2XvgED1fHoy6rwbVw6s69EO1t1TZjxrG6MkzMPv6UqlC\nuZw/y6TkZOZMHMe5ixfp/cYIfvn6c0ym4m+Gkpp119cSCFFK9bJc97HattNSnX4Ro2MP4BJGvfpF\nYLJSygeoiNUCpJQqB9QEvrPUrvsCcbYIHxqSt478Umz+mvXGLF80H4Bpsz8hrKKlZn1QbstWxyee\ntlvN+i218LFxhJTNrU9v1rgRyxd8bOT9eB5hFcobeV/N7crr+Pdn7VoLH1qmDLEJiTnXL8UlEGI1\nxTW7vwHL5kwFYNr8xYRVCCXj6jUa1auLh4c7VcIq4utTmoSky5QJCrRb7pJM3ieV1zWMiSi7Ot36\n6OaN21w2YUxcM7XWbYH5BTznWavnbKq1tsmR3lYtmrNm3S8AHPntGKEh+WrWB7+Zt2a9maVmffxE\nADZu2WbXmvVWDzZlzS8bjLzHtCVv7r8L/d94i/iERCPvpq20aNaEY8dPMHLCB0bebTuoq2rZtRa+\nVdPGrP11s5FZnyC0bDBmn9zMA4aPJT4xibT0DDZs3UHLBxrRumljduw9QGZmJomXk0lLzyAowN8m\n+TLJsvmXvckkldcO4Clgm2V3729a6+l38biyQJRSyhvoAmzP3qC1TlRKoZSqq7U+qpR6DfhVa32w\nuMM3btiAiDq16d5nACY3E++MGM533/9o1Ky3b8czf3+SvgPfwGQyGTXrQYEEBviTlZVJ1+f74u3l\nxdTId4s71u3zNqhPRG1F936vGnmHD+W7H/+Dn6+ZTg8/xDNPPU7f14Yaefv0yq2Fz8yka5+XjLwT\nxtotL0Dj+nWJqHUf3V8dipubiXFDBvHdT//Dz9eHTg+1ottjf6PfsNGYTPBSz2cICgwAoHPb1jz7\nyhAAxrzxql0X1pJOatbJfbsA8DBGA3EoxnGo17XWu5VS0RjV6SlKqd1AV611dPZloDPwBhAFLAbm\nYCxWn2mtmyilWgPTMKaqc8ALWutCzxAXXrVKVvTRA8X9rdpEeN2GkAXRh3Y7OspdC6/XGIA/dm90\ncJKiMZWrXuiBrEX+ITb/ge6bHGvXmnVZpJyULFK2JYvUn2fvRUp294RwIa44csiOsRDCqckkJYQL\nccX3SckkJYRwajJJCeFCHPE+JluTSUoI4dRkkhLChcgxKSGEsDOZpETxMAHuJemvk2XkuJbh2BjF\nzBVP1SKTlBDCqZWkf/qEEHcgx6SEEMLOZJISwoXI+6SEEMLOZJISwoXIMSkhhLAzmaSEcCGu+D4p\nWaRcTImrWZ/2EQcOHcFkglFvFZD3syV4eXryf490pNezlrzvT+H3qCg8PT0ZP/Kfds0L8MEni9j/\nm8ZkMjF6YD/q187t4Vu3ZQdzl32Dl6cHXR5uQ6+nuuRsy7h6lcf7v8GrvZ7hH4+0t2vmkkx292xI\nKbVEKfWYvV7PumY9ctwoIqfkdkhk16wvmD2dZQvnsX7jZi5cvMS6DRtzatYjx41myozZ9oqbm9fy\n2pEfzsibd8o0FsyaxrKFc1m/cUvevIsXEDl2FFM+sl9egJ0HDhN99hyrZk8mctggJn68MG/mOQv4\n9P0xLJ0Ryfptu7gQm9teNnfZ1wT4+dk0X2aW7b/sTRYpF3K7mnUgT826m5tbTs169OmYAmvW7Za3\nXVsjb7VwLicn581rNufLu4vomDM501aVypU4d95+eQG27ztIx1bNjcxVK5OckkpKapqR+XIyfpYm\nYzc3N1o0bsDWvUYp0MnTZ4g6dYa2zR+wW1ZXIYvUXVJK9VFKLVZK/aCUOqmUek4p9b1S6oRSqrlS\narpSarNSardSqn++x7orpT5TSq233Mcms35cXDxBQblFmdm15UCemvXr12+wY/ce4hISqFWzBpu3\n7eDmzZucjD6VU7NuD3HxCQRZFWQGBwXlzWupWc+T9758eS016/YSm5BEsFVnXnCAP7GJRlmoUbOe\nTvSZc1y/cYMd+w8Rn2j8WU6et4QRr7xo83zSuydqAm2A/sBIoBHQB3gROKq1HqqUKo1RbbXQ6nE9\nMOrb+ymlygK/AA1sHbbE1aznryx/dyyj3o201KxXhKws2rZqwd4DB+k54FXUfdl5nahm/Z+vM3rq\nHMy+PlQqX46srCz+39r13F9XUalCOYflLMlkkSqa3Zaq9fPAQa31TUvtujcQrJTaitGtF5LvcS2B\nNpb+PYDSSikvrfW14gxX4mrWQ8oSF2+VN66AvJ/NuzXvwJet8na1c816MLGJuZPmpfhEQoKtatYb\n1mPZR+8DMG3hl4SVD+XnzTuIOX+RDdt3cyEuHi9PD8qXLUPLBxoWez5X/O2e7O4Vze2q1sOB9kBb\nrXU7IH/x5zUg0qpqvWZxL1BQEmvWm7Fm3XpLXk1o2Xx5XxuSm3fTlty871rybt1G3dp2rllvcj9r\nN24zMv8eRWiZIMw+pXO2Dxj5Xm7N+vZdtGzckBlj3+KbTz5k1ZzJdH20I6/2esYmC5SrkkmqeDQB\nvtdaX1dKPQG4K6W8rLbvAJ4EViilQoE3tdajijtEiatZb9iAiNq16f7iAEwmN94Z8Rbfff9v/My+\nuXkHvWlUlvexypuZRdcX+uLt5c3UiePtlhegcURtImpWp/vrI3AzmRj3+kt8t+YXo2a99YN069KJ\nfiPexYSJl557miCr41f24IrvOJcG47uklOqDUbX+luVtBV211n0sl/sBYUA68P8wdu+SMaravwH+\nC8wD6lpuG6+1/qmw1ytxDcZA9JH9Dk5y98Lr1Afgj63/c3CSojFVrltoe3BkqWCb/0CPzkiQBmNn\npLVeYnX5R+DH/JetzOBW/Qu4TYhiJcekhBDCzmSSEsKFZLrg4RuZpIQQTk0mKSFciByTEkIIO5NJ\nSggX4orvk5JJSgjh1GSSEsKFuOIxKVmkRPFxc3d0grt33fjoZObxPQ4OUjTuleve+U4uRhYpIVyI\nK37MTY5JCSGcmkxSQrgQVzwmJZOUEMKpySQlhAuR90kJIYSdySQlhAuRY1JCCGFnMkm5sOMnohg4\n9J/06dGdXt275dm2dcdOps+Zh7ubGw+1bsmgAX0dlDLX8RNRDBzyFn16Pkev7s/k2bZ1+06mz/nE\nkrcVg17q56CUMOmbtRz44ywmTIzs1pn64RVzti3/dTc/7DyEu5uJiCoVGdmtM5eSrjBm6Q9cu36T\nm1mZjOjamYgqFWySTc4nJfJQSpmVUtFFfEw7pdTvSqlud773n5eWns6EKdNo0bRJgdsnTpnO7A8/\nYMXiT9mybQcnTv5hyzh3lJaezoTJU2nRrGmB2ydOmcbsqZNZsWQhW7Zv50TUSTsnNOw6fopTlxJY\nMfxFJvR6jPe/XpOzLSX9Kov+t40vh/Zm6bA+RJ2P5cAfZ1iybgcdGiqWDHmeoU+2Z+b3622WL9MO\nX/Ymi5T9PQR8rLX+2pYv4uXpyYJZ0wkNKXvLtpgzZwkI8KdC+XK4ubnRtnVLtu3cZcs4d+Tl6cmC\n2TPuLm+rVg7Lu13/QYeGCoAaFcqSnJZBSrrRYObp4Y6nhztpV69x42YmGdevE+BTmiBzaZJS0wFI\nTssg0NfHIdlLKtndKyKllD/wLVAK2Gy5rQ3wPnAdiAEGYPyj8zlQCfAFxgOngL7AdaXUea31Klvl\n9PDwwMOj4P97Y+PjCbauYw8OIibmrK2i3JVC88bFE2xdxx4cRMwZx+SNS06lrtWuWpDZh7jkFMyl\nvfH29GBglzZ0HjeHUp6ePNqkLuHlytC7fXOenbKI73ccIiXjKkuH9bZZPnkLggDoBRzWWrcBsjuc\nZgFPaq3bAxeBbkAwsFZr3RZ4BnhXa30IWALMtOUCVWQl7TiGU+XNzZKSfpVP12zhp/EDWTthMIei\nz3LszEUW/byNRxrX5d/vvMq7Pf6PD7/92YF5Sx5ZpIquLrDVcnkDUA6oCXynlNoAPIzRwZcINFVK\nbcGYqMrc8kwOkr+O/eKl2AJ3s5xFaGi+vLGOyxsSYCYuOSXn+qWkFEICzABEXYijcpkggsw+eHm4\n07hGFY6cPs/eqDO0iagBQMva1Th8+rzN8skxKQFgIvf/KzeMCvWzVhXqTbXWU4AeGNNUG+Dvjola\nsEoVK5KSmsqZc+e4ceMG6zdtoVWL5o6OdVu35N242WF5W9Wpztp9xwA4evo8oYFmfEt5AxBWJoCo\ni3FkXLsOwJHT56kaGkyVkCAO/mHsnh46dY6qocEOyV5SyTGpotMYterfYkxNiQBKqbpa66NKqdeA\nX4GywB9a60yl1D8Ar9s9oS0cPnqMyTNmcvbceTw8PFiz7hfat21DpYoV6dS+HeNH/pNhI8cB0KVz\nR6pVrWLPeAXk/Y3J063y/mzJG1aRTu0fZvyotxk2YoyR95FOVKta1SE5G9WoTESVCvT4cAlubjDm\n2UdZve0AfqW96Xh/bfp2fJA+Hy3Fw93E/dUq0+S+KlQNCWLM0h/5796jAIzq9ojN8rniWxCkZr2I\nlFKBwGqMaWoz8ALwPDANY6o6Z7mtAvA9EAssAt7AaDp2A+K01nMKe50SWbP+2yEHJ7l74dXvAyBq\n2XQHJyka9w7PF1pxPtDkb/Mf6E+ykgvNoJSaATyIccDuDa31Lqtt0Ri/XLppuamn1rrQ34LIJFVE\nWuskjAkq2zuW/+bf/4gGGlhdX2bDWEIAjv9YjFKqLVBTa91CKVUH4x/oFvnu9qjWOuXWRxdMjkkJ\nIYpTB+D/AWitfwOCLG/b+dNkkhLChcy7w66YHZQHrE8cH2u5LdnqtnlKqXCMwyUjtdaF7qLKJCWE\nsKX8i+Y4YCjQDqgHPH2nJ5BJSghRnM5hTE7ZKgI5bwzTWn+RfVkp9R+gPvBNYU8ok5QQojitBboC\nKKUaA+e01lcs1wOUUmuUUtlvx2kLHL7TE8okJYQoNlrrrUqpPUqprRi/bByklOoDXNZar7ZMT9uV\nUunAPu4wRYEsUkKIYqa1HpHvpgNW22YCM4vyfLK7J4RwajJJOTVH/zbZhZmMf59NvoF3uKNwNJmk\nhBBOTRYpIYRTk0VKCOHUZJESQjg1WaSEEE5NFikhhFOTRUoI4dRkkRJCODV5M6cLM2rWh9Onx3O3\nqVmfa1Wz7rja8mwlpWb9g2X/4kDUaUwmGNXzSepXzz0//LKft/DD1r1GzXq1yozq+SRpV68y8tOV\nxCenUNrbi/f7P0tI4F86D9w9RSapu6SU6qOUmprvtmhL1foIpVT+U6Te7nmmWj5waVNGzfrUQmrW\npzH7w0msWLzAUrPumNrybCWlZn3nsShOXYxj5bjXmNjvGSKX/itnW0p6Bot+2sDS0QNZNmYwUWcv\nsv/EKb5av4PKoWVYOnoQLz/egdnfrSnkFUR+skgVA631JK31NkfnsGbUrM8gNCTklm0F16zvdkDK\nXCWmZv3o73RoXA+AGhXLkZyWRkp6BgCe7u54uruTlnGNGzdvknHtGgFmH05djKWBZdpqoqqz9/c/\nHJK9pJLdvaKpZjnVRGVgRvaNSqklGKecKAu0BkIABXyotf5MKdULeBs4A6RzF+fQ+auKVrMeTEzM\nGVtHKlSJqVlPukJEeKXcLH5mYi9fwVy6FN5engx6qjOdh7+Pt6cnXR68n2rlQ6hVqQK/HjxG56YN\n2HksinNxiQ7JXlLJIlU0tYDGgD/G6SduFnCf+kBLjFbjlUqpRcD7GF19ieQ9/7NzKGm1Zk6U17oS\nLiU9g/k/rOOnySPwLe3Ni5Pmcez0OZ5u2wwdc56eE+fQtHYNgv3NDkxc8sgiVTSbtdbXgXilVDJQ\nUKPmNq31TaXUGSAAo179itb6EoCldt2hCq5Zv3W30Fk4U816aJA/cZev5Fy/lJRMaIAfAFHnLlI5\nNJggP18AHqhVnSPRZ6hdpSLj+xin8k7NuMq6vTYfpF2KHJMqmvz/hBf0T/oNq8sm8taygxP8md9a\ns+642vK74VQ16/UUa3YdBOBI9BlCA/3xLV0KgLCywZw8dymnZv1wdAxVy5Xl1wO/MfPb/wLww9Y9\nPNSgtkOyl1QySRVNC6WUOxAM+AIJd/GYeCDA0nycCrQCbH6Q/fDR35g8Y5altty9gJr1txk2ciwg\nNetF0ahmOBHhlXhuwmzcTCbGvvAPVm/ahbl0KTo1qU/fLu3oPWkuHm5u3F8znCaqOhnXrrN83Rae\nfW8WAb4+THu1p0Oyl1RSs36XLG8beATwBu4DpgATMWp55pB74Lye1votpZQZOKy1DldK9cWoWY/G\nOHD+X631ksJez6hZP2ibb6aYhdc1ippLVM16jVoAnFy90MFJisbtwcfvuTMhyiLlpGSRsi1ZpEoO\nhx8fEUKIwsgiJYRwarJICSGcmixSQginJouUEMKpySIlhHBqskgJIZyaLFJCCKcmi5QQwqnJZ/fE\nvemm8SHgrHOOPSOpuDOZpIQQTk0WKSGEU5NFSgjh1GSREkI4NVmkhBBOTRYpIYRTk0VKCOHUZJES\nQjg1eTOnCzt+IoqBQ4fTp8dz9OreLc+2rTt2Mn3OXNzd3HiodUsGDejnoJS5jp+IYuCQt+jT8zl6\ndX8mz7at23cyfc4nlrytGPSS4/JO+nEzB05fxGSCkY+1pn7lcjnblm87xA/7juPuZiIiLJSRj7dm\n9Z5jzP7fDioHBwDQomYlXnm4iaPilziySFkopcoD72qtX1ZKRWNVsKC1/rEIz3M/8Het9Tu32T4e\niNNaz/nLoQuRlp7OhClTadG04B+GiVOm8dnHsygXGkKv/q/wSIeHua96dVtGKlRaejoTJk+lRbOm\nBW6fOGUan32SnfdlI28N++fddfIsp+Ius2Lg00RdSmDMN+tZMdDo1EvJuMaijfv571s98XB3o/9n\n33Pg9AUA/tbgPv7ZpZXd87oC2d2z0Fpf0Fq/XAzPs/92C5Q9eXl6smDWjAJLP2POnCUgwJ8K5cvh\n5uZG29Yt2bZztwNS5vLy9GTB7BkFln7ekrdVK7bt3OWAlLA96iwdIqoBUCM0mOSMq6RkXAPA090N\nT3c30q5d58bNTDKu3yDA0skn/rx7apKy1FK1xaieigBGA88BdYGewMda61tGD0vX3qdAdcATGKe1\n/kUptQH4H9De8pyPW+4zWGvdVSk1DOiK8Y/Bf7TW79r0G7Ti4eGBh0fB//fGxscTHBSUcz04OJiY\nmDP2ilagQvPGxRMcFJhzPTg4iJgzZ+0VLY+4K2nUDctd+IN8SxN3JQ1zKS+8PT0Y2KEpnT9cSilP\nDx5tcB/hIYHsO32B3SfP8dKiH7iemcnwLi2pW9F5G6OdzT21SFnUBNoA/YGRQCOgj+Xy7fQAzmut\n+ymlygK/AA0s25K11h2UUpOAfwD78z22NUaD8Uml1Ixi+y6KU0mrNXOmvFZZUjKu8emGPfw0rAe+\n3l70Xfgvjp2Po2HlcgT7lqJt7XD2n7rAyK/W8a83uzswdMlyL+7u7dZaZwHngYNa65vARSCgkMe0\nBJ6yTE7fAKWVUl6WbZss/z1TwHOkAb8C6zEmreBi+Q7+otCQssTFxedcv3gptsDdQmcRGpovb2xs\ngbuF9hDi70PclbSc65eS0wjx9wEg6lIilYP9CfItjZeHO43DK3DkbCzVQ4NoWzscgPurlichNZ2b\nmZmOiF8i3YuL1I3bXD5VyGOuAZFa63aWr5pa62sFPEdOcaNSqiowFPib1rrdHZ7fripVrEhKaipn\nzp3jxo0brN+0mVYtmjs61m3dknej4/K2qlmZtYejADh6NpZQfx98vY1/r8KC/Ii6lEjGdeOvxJGz\nsVQtE8Bnv+7j3/t/B+D3C/EE+5bG3e1e/NH7c+7F3b0/YwfwJLBCKRUKvKm1HnWHx5QFLmmtU5RS\njYGqgNcdHlNsDh/9jckzZnH23Hk8PNxZs+4X2rdtQ6WKFenUvh3jR77NsJFjAejSuSPVqlaxV7Tb\n550+05LXgzU/W/KGVaRT+4cZP+ptho0YY+R9pBPVqlZ1SM5GVSsQERZCj7nf4jEpG38AACAASURB\nVGYyMebJh1i95xh+pbzoGFGdvg81os+Cf+HhZuL+quVpUq0ilYL9GfHVz6zaeYSbmZlMePphh2Qv\nqWSRujtfAe2VUlsBd2D8XTxmP5CilNoCbAbmA59YLttcvbp1+HLB3Ntub/pAI1Z9/pk9otyVenXr\n8OXCebfd3vSBxqz6YpEdE93e0L+1yHO9doXcXc9nm0fwbPOIPNvLB5hZMuApu2RzRaYsZzoIKXKE\nV62SFX30oKNj3JXwusbvEKJ/O+TgJHcvPNx4G0HUPIe/W6RI3P/xhunO93ItsmMshHBqskgJIZya\nLFJCCKcmi5QQwqnJIiWEcGqySAkhnJosUkIIpyaLlBDCqck7zsW9KfsDvpeTHJtD3JFMUkIIpyaL\nlBDCqckiJYRwarJICSGcmixSQginJouUEMKpySIlhHBqskgJIZyaLFIu7PiJKDo+8Q+Wrvz6lm1b\nd+yk6/Mv8mzvfny8wDlOI3z8RBQdH/87S1d+dcu2rdt30rVXH559oS8ff+rYvJN+3sNzX6yhx5dr\nOXQ+Ps+25XuO89wXa+i19H988PMeAOJTM3jpq/X0Wf4zPb9cy4FzcY6IXWLJIuWi7qZmffaHk1ix\neAFbtu3gxMmTdk6Y193UrM+eOpkVSxayZft2TkQ5Ju+u0xc5lXiFFS88woRHm/P+/3Kbn1OuXmfR\njt/4slcnlvbqRFTcZQ6cjeOHI3/wREQ1lvToyJttGzJ7Y8k4LbSzkEXKRUnNum1sj75Ih1qVAKhR\nNoDkjOukXL0OWNes3+BGZiYZN24SUNqLPs3q8FhEOADnk9Mo5+fjkOwl1T312T2l1A6gh9Y6SilV\nCfgXcBCjGt0boz59rVIqGqhnqaOaChy2PEVrIARQwIda68+UUs8D/wRigDiMduMvuX0t+2EArfVg\nW36vUrNuG3Gp6dQtn9vxGuTjTVxqOmZvT7w93BnYuh6d531PKQ93Hq1TlfBgfwBiU9IZ9M2vpF67\nweLn2jske0l1r01SXwLPWi4/gbFIZWit22JUpM+5w+PrW+73FPCaUsoN+ADoCHTDqG+H3Fr2hy33\n/cjqOQ7beoEqspLWGOSkeVOuXufTbUf56aXHWPvqExw6H8+xi4kAhJhL81Wfv/HPDo0Y9e/tDk5a\nstxri9QKjEUG4DGgMrABQGt9DriqlCqsCn2bpZY9u1K9LJCstb6otU4F1lnuV1gt+87i+3b+HKlZ\n//NCzKWJS03PuX7pShohvqUBiIq/TOUAX4J8SuHl7k7jSiEcuZDArtMXuZxhFF63rRHGUcvCJe7O\nPbVIaa3jgTNKqaYY33saVtXoGA3DmYD1P9WeVpfzV6qbLPfPlv24wmrZr+FgUrP+57WqVoG1OgaA\noxcSCPXzwdfb+CsSFuBLVHxybs36hQSqBvvxPx3Dvw4ZB/qPX0qivByTKpJ76piUxZfAxxjHjDKA\nh4GVSqnKQKbWOkkplQxUUEqdBB4E9t3mueKBMkqpIMtztQO28Odq2YuV1KzbRqNKIUSUD6bHl2uN\nmvVOTVh98CR+3p50VJXp27wOfVasw8PNjfvDytKkcig1yvgz8t/b+Z+O4drNTN55pODfYIqC3XMN\nxpbdrgsYB7VTgHlADYwpaqTWeqNSagAwDNAYC9FGy8Praa3fUkqZMY4thSulBgIDgd8xFqr/YOxW\nzgPqYqll11r/ZNn9G6y1zj4Qf1vSYGxb4VWMRS7qwyEOTlI07i++c881GN+Lk1Qr4AetdfYpGfvn\nv4PWegGw4HZPoLVOAcItVy8BD2mtE5RSa4AorfWN2zxvu78WXYh7zz21SCml3gUeAZ4uxqf1AX5R\nSqUC+7XWW4vxuYW4591Ti5TW+h3gnWJ+zi+AL4rzOYUQue6p3+4JIUoeWaSEEE5NFikhhFOTRUoI\n4dRkkRJCODVZpIQQTu2eeguCEDm8SgFgatjCwUHEncgkJYRwarJICSGcmixSQginJouUEMKpySIl\nhHBqskgJIZyaLFJCCKcmi5QQwqnJIuXCpGbdNj744lu6j5vGc+OmcyjqVJ5t63YfpNvoD+k5fgbL\n1vyac/vxmHN0fuPdPLeJuyOLlA0ppaIt50O3vu0xpdQSW7+21Kzbxs6jv3PqQiwr3xvGxJd7EPn5\nNznbMjMzmbj4a+a//QpfjnuD9XsPcyE+kbSMq0Qu+YYH69VySOaSThYpFyU167ax/chxOjQxiidq\nhJUnOTWNlDSjhy/xSip+vqUJ9vfDzc2NByMU2w5rvDw9mP/2K4QGBTgkc0knn90rJkopT3Kr1b2B\ncVbb6mOcYjgBiLJHHqlZt424pGQiqlXOzeJnJvbyFcw+pQn2N5OafpXo85cICynDzqPHaVq3Jh7u\n7ni4uzskryuQRar4PIelsl0pVRFLM7LFWIxaq38ppeY6JF1hSlqtmRPltU5iMpn44NVejJm/DLNP\nacJCyjhV1pJKdveKTxPyVbYD2ZXtdYHsFpkN9g6Wn9Ss/4UsQQHEJSXnXL+UeJnQQP+c683q1mTp\n+CHM++cr+PmUpmJIGUfEdCmySBWfLAqubIe8dewO/zOXmvU/r1WD2qzZsR+AI3/EEBoUgG/pUjnb\nX5r0CfGXr5CWcZX1ew/Tsp5ySE5XIrt7xWcX+SrbgewCUo0xaa2x3MfmpGbdNhrVqk5E9co8N246\nbm4mxr7YjdW/bsfsU5pOTRvStX1L+n/wMSZMvPRkJ4L8zRw5eZrJS1dzNjbB+P9ix35mDe1PoNnX\nId9DSXPP1azbilLKg3yV7RgHy+sBtYDFwFngJGDWWvcp7PmkZt22wu8zJpyTP5asykS3xp2lZl38\nObepVg+3/Hcv0NCugYRwEQ4/PiKEEIWRRUoI4dRkkRJCODVZpIQQTk0WKSGEU5NFSgjh1GSREkI4\nNVmkhBBOTd7M6cxMJejNxVlZkJHq6BR3z8MTAFPl2g4OIu5EJikhhFOTRUoI4dRkkRJCODVZpIQQ\nTk0WKSGEU5NFSgjh1GSREkI4NVmkhBBOTd7M6cKOn4hi4JC36NPzOXp1fybPtq3bdzJ9zie4u7nx\nUOtWDHqpn0Myvv/Rxxw4chQTJkYNGUyD/9/efYdHUa9tHP9uGpBCGgm9Cz+lKQiRKiLFcixHBQXh\nIGI5gngUy6sUEQVFFBsCApaDggIWbMdDUQQbBJQO6sMhFOkkISSk0ZL3j5kkCyQYkGRnkudzXVzs\n7uzM3hv04TfDJneTgg9XfvP9j7wxYxZBgYH8rduV9Ot1Ex998RVfLPg6/zkbfxfWfDu/VDOPm/gG\nazf9hsfjYcSDg2l+UUHZwuIflvHGu+8TFBjItd2uoN8tf+fj/8zn8wXf5D9nk2xm9ddflmpmN9Mh\ndZ7Y1ekfA8eB+iLi03694tSWvz1lIlVjY+h39z+5qmsXLmjYoFQzrly9lh07dzH3zckkbN/B8Gdf\nYO6bkwGrsnzMSxP5dMZ0IsIrc8/Dj9Otc0d63fA3et3wt/z95y9eWrqZ16xj+67dzJ020co87iXm\nTptYkPmVScx7e4qV+dHhdOvUgZ7XXUPP667J33/Bt9+Vama309O980xEFvh6QIE7asuX/7Kabp07\nAtCwXl1S0w6TnmF9a03KoVQqh4YSFRlhVZa3bsWyn1edtP/kd95j8MD+pZo5ftUaunVqn5857XB6\nQebUVMJCQ/Izt7u0Jct+WX3S/lNmzGLQgH6lmtntyv1KyhgzAOgMVAGaAiOw2oibAH2BySLS2n7u\nL0BPrPaXsUAWsN9+nvfxmgGTgJlYtertgTeAFsBl9jEnl+T7ckNtedLBgzS9sHFBjsgIEpMPEhpi\n/Y+ekZnJ9p27qFm9GitWrSWu1SX5z13/6+9UrxpLTHRUYYcuMYnJKTQ1XpkjwklMTrEyR0SQkZlV\nkHn1OuJatsh/7obfhGqxMaWe2e3K/ZCyNQI6YbW9DANaAgPs24UZAjwiIj8YY24GiqqpvQT4O1aT\n8SagPlAR+AQo0SF1VhxSa+Zdr+bxeHj+yScY/uwLhIWEUKtGtZNyfvzFV9z0t6t9EfMkp2Ue8Rgj\nxr1EaGgItapXO+lL+9GX87npmqt8kNLd9HTP8ouI5AJ7gfUicgJrhRRexPM/AqYaY4YDa0RkXxHP\nSxCRZPu4B0Rk958ct1Q4pbY8tkoVkpIP5t8/kJRMTHTBvI9rdQkfTJ3ItJfGERYSSs3q1fK3rViz\njpbNm5ZqXoDYKtEknpq5SsHKKK7lxbw/5RWmvTCW0NAQalavmr9t5Zp1tGzepFTzlgU6pCzHi7h9\nqkAAEZmJ1UScBHxpjCnq530UdVyf/gwWp9SWd4hrzcIl3wPWv3jFVokmNCQ4f/vdQx8n+WAKmVlZ\nLPlpGe3aXArA/sQkQipVJCgw0AeZL2XR0h/szP+zMgcXZL7nkeEkp1iZl/4UT/vWrazMSUkEV6rk\nk8xup6d7Z5YGXGCM8QBVsdqJMcY8CUwSkenGmFis61eO4oba8lYtmtH0wsb0vmcIHj8PTz36IPO+\nWkBYSAjdr+jErTf+jYEPPWZVlve/nagIawGamJxMVGRkqecFaNW8KU1NI3rf9yB+Hg+jHn6Aef9d\naGXu3JFeN1zDXUOfwOPxcO8/ehOZlznpINFe1wFV8ZX7mvW8C90i8qgx5jqgp4gMyLsN5ALNgXVY\nF8z/gXWh/V9Aiv3rDmAK1kcQqlBw4fxjEWltjAkFNopIPe/bZ8pVr26dXLfUlte7qDnk5rJ97Qpf\nRym2epdYK8dtq37ycZKz44mp46KfhHh+lPsh5VQ6pEqWDin30GtSSilH0yGllHI0HVJKKUfTIaWU\ncjQdUkopR9MhpZRyNB1SSilH0yGllHI0/bYYdX54PFAxxNcpii/jMAA586b6OMjZ8f/nc76OUOp0\nJaWUcjQdUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhVYZt3pJAt+tvYtac\nD0/btix+JT37DeC2/gOZPP1tH6Q7nVvyPr90HX1mL+H22UvYsO/gSds+WJtAn9lL6DdnKeOWrAPg\neE4Owxb8TL85S+n9wRJW7U7yRWzX0iFVRhWnZv31CeOZPeMtfoqPZ0vC1lJOeDK35P15ZyI7UtKZ\n3acLY3pcynP2IAJIP3KMd37ZzMzbOjOr9xUkHExj3Z5kvvj1DyoFBjCr9xWM6XEpLyxd75PsbqVD\nqoxyQ826N7fkjf/jAF0vqAFAw+jKpGUfJf3IMQAC/f0I9PMj8+hxjufkkH3sBOGVgrj+ojo83tlq\nMo4KDuJQ9lGfZHcr/d69UxhjVgC3i0iCMaYW8DmwHmgAVABGicgiY8x2rJaZdGPMBGCjfYiOQAxg\ngBdF5G1jzD+A/wN2YnX1fSsiM0ryfbihZt2bW/ImZWbTpGpBnVZkcAWSMrMJrRBIhQB/Bre7iB5v\nL6BigD/XXFibepFhJ+3/3uot/O3C2qUd29V0JXW6mcBt9u0bsIZUtoh0Bm7Gqqo6k+b28/4OPGCM\n8QPGAd2AXlh17s7itsYgJ+X1ipJ+5BjTV/7O/IFXsejua9iw9yC/Jx7K3/7B2gR+23+IQW0v8kFQ\n99IhdbrZWEMG4DqgNrAUQET2AEeMMVGF7wrAcrumfRdWnXoVIE1E9otIBrC4pIIXl1Nq1ovLSXlj\nQiqRlJmdf/9ARhYxIRUBSDh4mNrhIURWqkCQvx+talZh035rSH2yYRtLE/by+o3tCPTX/+3Ohn61\nTiEiycAuY0wbrK9PJifXogcBOZz0dyje3dmn1ql77Ofn8fkywCk168XlpLwd6lVl0WbrVPPX/SnE\nhlQiJMj6469ZOZiE5MNkHzsBwKb9KdSNCGXnoXTmrt/Gaze0pUKAv09yu5lekyrcTGAyMB3IBroA\nc4wxtYEcETlkjEkDqhtjtgJtgTVFHCsZiDbGRNrHugIo8UZKN9SsuzFvyxrRNK0awe2zl+Dn8TCy\n6yV8umk7YUGBdGtUk4FtGjPgo+8J8PNwSY1oWteqwis/buRQ9hHu+7Tgj/3NWzoRpCuqYtEG40IY\nY4KAfVgXy9OBqUBDrFXUMBH53hhzD/AIIFiD6Ht797zKdu9q9cHAYOB/WIPqvyIy80wZXNdgDLgl\nL0C92taQS3husI+TnB3/fz5X7hqMdSVVuA7AlyKSd9Xz7lOfICJvAm8WdQARSQfq2XcPAJeLyEFj\nzEIg4fzGVars0iF1CmPM08BVwC3n8bDBwLfGmAxgrYgsO4/HVqpM0yF1ChF5CnjqPB/zPeC983lM\npcoLvXKnlHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH048gqPIpx/r+OlIOnvl5yud0JaWU\ncjQdUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SZdjmLQl0u/4m\nZs358LRty+JX0rPfAG7rP5DJ09/2QbrTuS0vgKdrL/z+8X/49XsMqp1cDuFp1dna1vdRPF17+Sih\n++mQOkfGmO122YL3Y9cZY2b4KNJJMrOyGDN+Au3i2hS6fewLL/H6hPHMnvEWP8XHsyVhayknPJnb\n8gJQuxGeyBhyZr5AzvyZ+HW/rWBbUEU8l/UgZ9YEct6fgKdKdahR33dZXUyHVBkVFBjIm6+/UmiJ\n5s5duwkPr0z1alXx8/Ojc4cOLF/5sw9SFnBbXgBPvQvJ3bzOupO8DyoGQ5BVFMqJ49avoArg8YOA\nIMjK8FlWN9NvMC4GY0wgVgdfA6ACMMprW3Osn19+EK8WGGPM/cDtWMWgn4nIS8aY0fYx6gNX2E3H\nJSIgIICAgML/eBOTkomKjMi/HxUVyc5du0sqSrG4LS8AIZXJ3fdHwf3MwxBSGY5mw4nj5P74FX73\njYXjx8j97WdIOeC7rC6mK6ni6QNki0hnrAr2SV7bngRGi0hX4ASAMaY+0BPoCFwO3GKMqWM/P0hE\nOpXkgDprbutedGxer0q8oIp42l1NzvRR5LwxAk/1+hBb03fRXEyHVPG0BpYCiMge4AgQZW9rAuRV\nVC21f48DGgFL7F9hFHTwrSzpsH8mNrYKSUnJ+ff3JyYWeprlFI7Nm56KJ6Rywf2wcMhItW5HV4PU\nJOsUL+cEubu24Knm25Zot9IhVTy5nPTXJEFYp3HYj+fdzvt6HgW+EpEr7F/NReR7r20+VatGDdIz\nMti1Zw/Hjx9nyfc/0qHdZb6OVSSn5s3d9ise08q6U7U2HE6Fo0es+6nJ1qAKCATAU60uuQf1dO9c\n6DWp4vkZ6ALMMcbUxhpKee3GgrXSWmg/B2AVMN4YEwxkAa8CT5Rm4I2//sb4l19j9569BAQEsPCb\nb7mycydq1axB9yu7MHr44zzyxEgArr2qO/Xr+vZvebflBWD3VnL37bA+fpCbS87Xs/E0b0fukSzY\nvJbcFV/j12co5OSQu3sr7Nri68Su5Ml17Pm9cxhjAoCpQEOsVdQwrIvlzYDGwL+B3cBWIFREBhhj\nBgMDsa5TfSYi4+wL50kiMun0VzlZvbp1crf/tqEk3s55V++i5gC4JS9AvZq1AEh4vK+Pk5wd/yem\nev78WWWLrqSKQUSOA3ef8nA9+/fVwMWF7DMFmHLKY6NLIJ5SZZpek1JKOZoOKaWUo+mQUko5mg4p\npZSj6ZBSSjmaDimllKPpkFJKOZoOKaWUo+mHOVX5FBIGgN/NA30cRP0ZXUkppRxNh5RSytF0SCml\nHE2HlFLK0XRIKaUcTYeUUsrRdEgppRxNh5RSytH0w5xl2OYtCQwe+igD+vahX+9bT9q2LH4lL0+a\ngr+fH5d37MD9997lo5QF3JJ33JuzWCsJeDww4p5/0Lxxg/xti+NX8cbczwkKDOTay9vS77ruZGUf\nYdir00k6lMrRY8cYdNvf6RLX0mf53UaH1HlgjLkaq/BzPvCxiLQ+ZXuSiJRqB1NxasvfnjKRqrEx\n9Lv7n1zVtQsXNGxQ6HNLg1vyrtzwG9v37GfuhKdI2Lmb4a+9xdwJTwGQk5PDmGnvMe/VMUSEhXLP\n6Al0a3spq3/bTLNG9bn7luvYfSCJgU+O1yF1FvR07zwQkQUi8oavc3hzW225W/LGr/uVbm0vBaBh\n7ZqkpWeQnpkFQEraYcJCgokKr4yfnx/tLm7CsrUbubZTW+6+5ToA9iUmU61KpE+yu5WupM6B3UY8\nC6sJJgD4BqsAdJLXc64BHgCut+8/A/QAkoHrRSSHEuS22nK35E08dIimF9QryBIeRmLKIUKDKxEV\nXpmMrGy279lHzdgqrFj/G3HNL8p/bu/HnmZ/UgpTRz3sg+TupSupc9MT+FpEugAPYjUa5zPGXIBV\nv97HrlOPwjoNbGvfblHKec/MbbVmDsrrHcXj8fD8Q/cy4rU3GfLsa9SqGoN3ZdycF59iypNDeezl\nqWiVXPHpSurcLAI+NcZEAB8D+4C885QQ4DOgv4jYndukich6+/ZuILw0w57KsbXlRXBS3tioSBJT\nUvPvHziYQozXKi+u+UW8P/5JAF56dy41q1Zh45ZtRIdXpnpMNBc1qMuJEyc4mJpGdIRP/zNwDV1J\nnQMR2YjVtfcDMA6o47W5lv34YK/Hjp9yCJ8WPDq1trwoTsrboWUzFi1bCcCmLduJjYokNLhS/vZ7\nnnqR5EOpZGZns3TlGtpf3IxfNv7Ovz+bD0BSSiqZWUeIrBzmk/xupCupc2CM6Q1sFZHPjDFJwH+B\n6fZmwRpQ3xpjeojIIl9kdFttuVvytrqoMU0b1qf3Y0/j5/Fj1KD+zPvme8JCgunerjW9rrqCu0a9\ngMfj4d5e1xMZHkbva7oyYuJb9H18DNlHj/HkoP74+en6oLi0Zv0cGGNaYdWup2NdPJ+HVcE+Cfsj\nCMaYhsCXwGXAtryPIBhjPgYmicjSM72G1qyXrHqNrQva2779zMdJzo6ncZzWrKs/JyKrgbgiNre2\nn5MANLEfy7+AIiI9SzadUmWLrjmVUo6mQ0op5Wg6pJRSjqZDSinlaDqklFKOpkNKKeVoOqSUUo6m\nQ0op5Wj6YU5VPh2xfgZUTrxPvmvpnPk3LuozxGWXrqSUUo6mQ0op5Wg6pJRSjqZDSinlaDqklFKO\npkNKKeVoOqSUUo6mQ0op5Wj6Yc4yzC215Xnckvf5r39m3e4kPHgY1qM1zWsUNNd88Ivw5cat+Hs8\nNK0ezbAebUjOyGLYF8s4euIEx07k8H/dLuXimjE+y+82upICjDFn/SN9jTF1jDFn/fFfY0wLY0zj\ns93vbBWntvz1CeOZPeMtfoqPZ0vC1pKOdEZuyfvzjv3sOHiY2QOuYcx17XhuUUGTcvqRo7wTv4mZ\n/a9i1h1Xk5CUyrrdiXy5cRs3NG/AjH49eOiKlrz+3TqfZHcrHVKWJ85hnysp+uecn8nNQIkPKbfU\nludxS9747Xvp2rg2AA2rhJOWfZT0I0cBCPT3J9Dfj8yjxzmek0P2seOEV6zAgMuacF2z+gDsTcug\naliwT7K7VZk63TPGDACuBipj9d+9AgzHqpw6ALwLvAMEATnAXVhtxBcbY+aJyM3GmGeBToA/VqvL\nbGNMD2AskAXsB+4HRgPHjDF/AJnAGOAokALcCrQHhtivcxFWieg84D4g0RhzQERWltTXwi215Xnc\nkjcpPYsm1aLz70cGVyQpPZvQCkFUCPBncKcW9Jj8KRUD/LmmaT3qRVcGIDE9i/s/XELG0WP8u293\nn2R3q7K4kmoK3IC10hkLVADmi8izwDPA2yJyBTAFGC0iLwKp9oDqBNQVkcvt/UcaYyphDZtHRKQz\nMAdrgM0AXhORL4BI4HZ7expwlZ0lDhgAtAMeEJENwAJgWEkOqLPmtlozJ+X1ypJ+5CjTf9rI/EE3\nsmjITWzYncTv+w8CEBNaiQ8HXsv/dWvN8C+X+SqtK5XFIfWdiBwXkSSsVU0VIG8gtAaW2reXAC1P\n2bc90NYYsxRYiPX1qQ58BEw1xgwH1ojIvlP2SwTeMsZ8B3QB8v6qXS0imSKSfr7e3PngpNry4nBS\n3piwYJIysvLvH0jPIibUajBOSEqldkQokcEVCfL3p1WdWDbtPcjPO/aTmnUEgM4X1OTXfQd9kt2t\nyuKQ8n5PHiAX6zQM+3ZeuWLeKZ+3o9grLfvXRSKyVURmYg2fJOBLY8yFp+z3DjDEXkl97vX4qfXq\njuCk2vLicFLeDg2qs+j3HQD8ujeZ2NBKhFQIBKBmeCgJyWlkH7P+2DftTaZuVBhfyx98vsG60L/5\nQArVKus1qbNRpq5J2doZY/yxTsHCgGSvbT9jDZvZQGfgF/vxvMG2AphgjBmPNcReFJEHjDFPYl2f\nmm6MicUq/cyh4OsXDvxhjImwj7/+DPm89ysxbqktd1velrViaVotmttnLMDPAyOvjuPTdQmEVQik\n24V1GNi2CQNmfU2An4dLasXQuk5VGlYJZ9gXy/j69z84euIET13j3L8QnKhM1azbF85vxFoxXQC8\niHVBu5mIpBtjagBvY12nOgrcJSK7jTGLgTARibMvnHfDWnFNEZEZxpg7gH9hnT6mAHcAHbAuxD8G\nGKzrYJuBr7Auqg8HbslrLDbGJIlIFWPMncDTwJ0isrio96I16yWrXt16ACS89Ihvg5wl//4jy13N\nelkcUs1E5FFfZ/mrdEiVLB1S7lEWr0kppcqQMnVNSkRm+DqDUur80pWUUsrRdEgppRxNh5RSytF0\nSCmlHE2HlFLK0XRIKaUcrUx9BEGpYgusAICnWfmrLXcbXUkppRxNh5RSytF0SCmlHE2HlFLK0XRI\nKaUcTYeUUsrRdEgppRxNh5RSytF0SJVhm7ck0O36m5g158PTti2LX0nPfgO4rf9AJk9/2wfpTueW\nvOPe+4Teo16iz6iX2ZCw46Rti39ZT68RL9J39Cu8v/C7/Mc379xDjwefPukxVTzlbkgZY27xdYbS\n4Jba8jxuybvy1/+xY18ic555hLH/vJ1n3/04f1tOTg5j//0R0x6/j5mj2JM/sAAAHqJJREFUHmTJ\n6o3sS04hM/sIz874mLbNSry4ukwqV0PKGFMP6OPrHKXBLbXledySN37TZrq2bgFAw5rVSMvIJD3T\n6uFLOZxBWEgloiqH4efnR9umhuUbhaDAAKY9fh+xkeE+yex25e179yYDccaYp4DmWLVXAVjtwuvt\nBuPngGPATuAeCqlLF5GnjTHdOL1aPReYBdQFlgG3ikgtY0wTYJK9/TAwQEQOleQbdUtteR635E06\nlEbT+rULsoSFkph6mNDgSkRVDiUj6wjb9x6gZkw0K3/dTJsmjQjw9yfA398necuCcrWSwqq4+g5r\n4CwQka7AIOAle/tE4EYRuRLYD/SyHz+pLt1+rLBq9auBiiLSFvgWqGE/93Xgn/brLQLuL6k3eE7c\n1hjkoLzeSTweD+MG9WPktPd54OU3qRkT7aisblXeVlJ52gMxxph+9v1gY0xVoBEwzxgDEILVWLwb\nuy4dwN4GBdXqAUADrKEUC/xkb/8vBQ3GccCb9r4VsEpKfcZJteXF4aS8sZHhJB1Ky79/ICWV2IjK\n+ffjmjRi1uihALw8+wtqxESXesayprytpPIcxTrFy6tTj7Mf2+31WBsRecF+fmF16YVVq3soqG7P\npeAv2kygi33cdiLyrxJ5V8XkpNry4nBS3g4tLmThirUAbNq2k9jIcEIqVczffu/zU0hOPUxm9hGW\nrN5I+2amqEOpYipvK6m8ivMVwN+B5fb1oqtF5GVjDMaYJiLyqzHmAaxTw6IUVq2eAPS0t/eg4Ou7\nDutUcL4xpjeQeKb24vPBLbXlbsvbsnEDmjaoTZ9RL+Pn5+HJO3vx6XfxhAZXonubi+l5ZXvuHjcZ\nDx7uvbE7kZVD2bT1D8bP+pTdiQcJCPBn4Yq1THz4biJCQ3zyHtymTDUY/xljTAywCvgEqIN1euYP\n/EtEfjHGdMS6PnUU2AP0x7oONaSQuvRnOL1avT1WjXtlYClwr4jEGmMuAqZjDcksrGtZB8+UVRuM\nS1a9C6wVztb/vOfjJGfHr1WPctdgXK6GVEkzxkRhndZ9YoypCSwWkQvP5Vg6pEqWDin3KG+neyXt\nMHCrMeYxrOt9Q32cRynX0yF1HonIMeA2X+dQqiwpr/+6p5RyCR1SSilH0yGllHI0HVJKKUfTIaWU\ncjQdUkopR9OPIKjyKe8jkQGBPo2h/pyupJRSjqZDSinlaDqklFKOpkNKKeVoOqSUUo6mQ0op5Wg6\npJRSjqZDSinlaDqkyjC31JbncUvecTM+pPfw8fQZ8QIbtmw/advin9fS64lx9B35Iu/PX5L/+OY/\ndtNjyMiTHlPFo0PqDIwxlxtjYu3b240xocXcb7sxJtQY84Qxpl3JpiycW2rL87gl78pNm9mxN5E5\nzz3O2EH/4Nl35uZvy8nJYezbc5k2bAgzn3mEJas2FNSsvzOXts3O6SdJl3s6pM5sIFZZwzkRkedF\nZPl5zFNsbqktz+OWvPEbfqdr3MUANKxV/ZSa9XTCgisRFW7XrDc3LF//m1WzPmwIsVFas34uyuX3\n7hljAoF3serQs7GG0WSsQtBgrJbicKzaq6bGmFvsXYcYY67F+rpdZe87HasctAIwSkQWeb3ODOBj\nYOEpr9dfREq0J9wtteV53JI36VAaTRvWKchSOYzEQ2l2zXoYGdnZbN+7n5oxVVi5cTNtmjbWmvW/\nqLyupO4A9olIB+BNrGH0loh0AYYBj4vI18Ba4E4R+cPeb6OIXA7sALoCfYBsuyD0ZmBSMV/vhhJ6\nX+fGbY1BDsrr3bbk8XgYd/8ARk6ZyQMvTqVmrNasnw/lciUFtAIWA4jIHGNMODDJGPMo1oooo4j9\nfrR/34210roUq18PEdljjDli11qd8fXO15s4V06qLS8OJ+WNjSqkZj2y4DQurmljZo15FICX3/+U\nGrFas/5XldeV1AlOfu8PYVWsdwQGnWE/77p1D1aNuncPWhAFNetnej2fclJteXE4KW+Hi5uwcPlq\nADZt/eP0mvVnXyc5Nc2qWV+1gfbNL/JJzrKkvK6kfgauBD4yxlwHjAQG29tuwho2UFDLfqbjdAHm\nGGNqAzkicsgY82ev10JEnjsv76QIbqktd1velqYhTRvUpc+IF/DzeHjy7j58umSZVbN+WUt6duvI\n3WMm4vF4uPemq6ya9YQdjH/vY3YnJhPg78/C+NVMfPQ+IsK0Zr04ymWDsTEmCHgL60L2MWAMMBXY\niXVd6VX7sTrAP4AbsarUm4lIujFmArARmGXv1xBrsA0Tke+NMduBZvaxPgYWnfJ6d/zZhXNtMC5Z\n9RrZDcbzZ/s4ydnxa9Gl3DUYl8sh5QY6pEqWDin3cMx1EqWUKowOKaWUo+mQUko5mg4ppZSj6ZBS\nSjmaDimllKPpkFJKOZoOKaWUo5XXb4tR5V1WJgC533zq4yBnqUUXXycodbqSUko5mg4ppZSj6ZBS\nSjmaDimllKPpkFJKOZoOKaWUo+mQUko5mg4ppZSj6ZAqw9xSW57HbXkBPJ1vwq/3UPx6D4WqdU7e\ndnEna9ttD+K54mYfJXQ/1w8pY0w1Y8y0Qh6fYIwZcMpjofbPHz+r2vTzyRhznV0aWqLcUluex215\nAah1AZ7IGHLmvELOog/w63JLwbaginhaX0nO3NfImfsanqhqUL2ez6K6meuHlIjsE5F/+jqH07il\ntjyP2/ICeOo0JneL/XPdD+6HisEQZNdbnTgOOScgqAJ4/CAwELKKqnNUZ+L4792zV0OdgSpAU2AE\nVnNwE6AvsB/4WERaG2P6AY8Du4AsYKMxpjLwCVCRgnJP7+PXAN7Gans5Adzt1ViMMSYAqyK9FlYN\n+2gR+Y8xphtWq8w+QIBEERltjHkW6AT4A5NEZLYxpjnwHnAQSDiPX54iuaW2PI/b8gIQHEbu/p0F\n9zPTITgMjmbDiePkLl+A312j4Pgxcn9fDYcSfZfVxdyykmqEVU0+DqsG/Sb7dp+8JxhjPMBzWPXn\nNwAX2Jv6YdWjd8KqTT/VGOAlEemKNXSePGV7FLDIrlK/FXjafnw8Vt3VVUBLO0MnoK5dxX4lMNIY\nU8k+5mj7NU6c49eg5LitMcipeU+qia2IJ647Oe+MJeetp/FUrwtVavgsmpu5ZUj9IiK5wF5gvYic\nwFpBhXs9Jxo4LCIHROQY8JP9eBNgmX17aSHHbg+MNsYsxRqAp/ZipwBtjDE/Ya2o8rbXFZE1dpb/\neh2rrX2shVhf3+rFyFCqnFRbXhyOzZuRhic4rOB+SDhk2BXsUVUhNRmyMyDnBLm7t+I55cK6Kh7H\nn+7Zjhdx23PKbe+Kc79CHi9sKB8FeonI3iJe+3as1VQn+/dfCnlO3l/tR4G3RWSc90Z7lXemDKXK\nu7a8WmwsS77/kQnPPePrWEVyat7c7b/j1/4acjcsg9hakJEKx45YG9MOQnRVCAiE48fwVK1NzrZN\nvg3sUm4ZUsWRDIQbYyKADKADsBzrelFrrOtShf0wnhXA34E3jDFXAtVE5AOv7VWAbSKSY4y5mYIK\n9n3GmAuB/wE9gCX2sSYYY8bbz3tRRB7wyrCwiAznnVtqy92aF4C928jdv9P6+EFuDjmLP8LTJI7c\no9mwZT25Py/Gr9cD1kpqzzbY7YB/kXShMjOk7CEyGvgO2I5Vgw7WBetPjTGLsS6cn3pBYzTwb2NM\nH3vbgFO2fwJ8YYxpC7wD7DLGjAJGAvOAbcBvwAkRWWaMWYI1HD3AFPsYY+3XeBDYSsGgKzHNmlzE\nzLemFrm9zaWtmPveOyUdo9jcljdP7o9fnvQfVG7SnoLbG5ZZqyz1l2jN+jkyxvQANovIdvtzWt+d\nsgL7S7RmvWTVq1UbgISRA3wb5Cz5Pzyx3NWsl5mVlA94sFZoh7E/BuHjPEqVSTqkzpGILMS6xqSU\nKkE+/5cmpZQ6Ex1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNP2clCqfKgYD4Olyg4+D\nqD+jKymllKPpkFJKOZoOKaWUo+mQUko5mg4ppZSj6ZBSSjmaDimllKPpkFJKOZoOqTJs85YEul1/\nE7PmfHjatmXxK+nZbwC39R/I5Olv+yDd6dySd9y7H9P7yQn0eXICGxJ2nLRt8S/r6DV8PH2feon3\nFyzNf3zzzj30+NdTJz2miqfcDCljzABjzISz3CfUGLO9kMcn2Me7whhz2o8NNsa8aoypX8QxK9s/\nH71EZWZlMWb8BNrFtSl0+9gXXuL1CeOZPeMtfoqPZ0uCb5tM3JJ35a//Y8e+ROaMeZSx9/Xj2Rkf\n5W/Lyclh7DsfMu2Jwcx8aihLVm9kX3IKmdlHePbfH9K2mfFJZrcrN0OqNInIQyKyrYjNrbAqsEpU\nUGAgb77+SqElmjt37SY8vDLVq1XFz8+Pzh06sHzlzyUd6Yzckjd+o9C1TQsAGtasRlpGJumZWQCk\nHM4gLCSYqMph+Pn50baZYfmG3wkKDGDaE4OJjQw/06FVEcrb9+7VN8b8F6gNvAKMApqJSLq9ytqI\nVVP1CVARqwILAGNMP+BxYBeQRUFlVqgxZhZwMfCRiDxjNxgPAQKxaq2O2L9uAyYDlY0xm0Vkekm9\n0YCAAAICCv/jTUxKJioyIv9+VFQkO3ftLqkoxeKWvEmH0mjaoHZBlrBQEg+lERpciajKoWRkZbN9\n7wFqxkSzctNm2jRpRIC/PwH+/j7JWxaUtyHVGGslUxlYB5wo5Dn9gI0iMtQYcxvQx24gfg6r4DMF\nWOX1/CbAhVir0m2Ad7XuncAUEZmZVzwKvIg1GEtsQJ01t9WaOShvrlfrnsfjYdzg/oycOovQ4ErU\njI32YbKyo7yd7v0oIsdEJBlIAwr7r6gJkNfouNT+PRo4LCIHROQY8JPX81eLSKaIpHNy7TvA58CT\nxpgxwAER+f18vZG/Ija2CklJyfn39ycmFnqa5RROyhsbGU7SobT8+wdSUk86jYtr0ohZTz/M1McH\nERZciRoxUb6IWaaUtyF16l/BiV63A+3fPUCOfduvkMe8Hwc4XtSLichioA3wO/CuMaZUKtb/TK0a\nNUjPyGDXnj0cP36cJd//SId2l/k6VpGclLdDi4tYuGItAJu2/UFsZDghlSrmb7933GSSUw+TmX2E\nJas20L7ZhT7JWZaUt9O9dsYYfyAKCAEOAdWNMVuBtsAaQLBO6z4B8oZKMhBujIkAMoAOWFXqZ2SM\nGQJ8JSLv26eMLYEkSuHrvvHX3xj/8mvs3rOXgIAAFn7zLVd27kStmjXofmUXRg9/nEeeGAnAtVd1\np37duiUdqUzkbWka0LR+bfo8OQE/Pw9PDryNT5cuJzS4Et3jLqFn1w7c/dzreDwe7v17DyIrh7Jp\n6x+MnzmP3YnJBAT4s3DFWiY+cg8RoSE+eQ9uU25q1o0xA4CrgArABcALQCXgEazBlAx8D3wGfIq1\ncvoR6C8i9Y0xA4EHge1YF84X2LeHiEhP+zWSRKSK14XzWsBYIBXrwvmdQAzwNfCSiBT5kQitWS9Z\n9S6wPg6w9atZPk5ydvxadit3NevlZki5jQ6pkqVDyj3K2zUppZTL6JBSSjmaDimllKPpkFJKOZoO\nKaWUo+mQUko5mg4ppZSj6ZBSSjlaefu2GFVScnPhSJavUxSf/WNhPLUa+TiI+jO6klJKOZoOKaWU\no+mQUko5mg4ppZSj6ZBSSjmaDimllKPpkFJKOZoOKaWUo+mHOcuwzVsSGDz0UQb07UO/3reetG1Z\n/EpenjQFfz8/Lu/YgfvvvcsnGZ97dRLrNm7C4/EwfOgDtGhyUf62b77/kTf+/R5BgUH8rfuV9Ot1\nMxmZmTz+9HOkHj7MsWPHuP+uAXRqG1eqmcdNfIO1m37H4/Ew4sFBNL+ooJl48Q/LeOPdDwgKDOTa\nblfQ75Yb+fg/8/l8weL852ySzaz++otSzexmOqS8GGOuBuqLyBtnud8lwE0i8lQR27djl5D+5ZDF\nVJza8renTKRqbAz97v4nV3XtwgUNG5RWPABWrl7Ljp27mPvWGyRs287wZ8cz9y3rS5+Tk8OYCa/y\n6btvERFemXuG/h/dOnfkm+9+pH7dOjwy+F72JyZxx5ChLJg7s/Qyr1nP9l17mDvtNRK2/8HwcS8x\nd9prBZlfmcS8t6dYmR8dQbdO7el53TX0vO6a/P0XfPtdqeUtC/R0z4uILDjbAWXvt7aoAeUrbqgt\nX/7LKrpd3hGAhvXrkXo4nfSMDABSDqVSOSyUqMgIq7K8dSuWrVxFZEQ4h1JTAUg7fJjI8NKtLo9f\ntYZundpbmevVIe3w4YLMqamEhRZkbndpS5b9suak/afMmMWgAX1LNbPb6UrKi90o0wyrifh2rMaY\nz4A3gOUicrExpgawE6gmIonGmHXA/wH3iEhPY8xErEosf+ANEZlhH36IMeZarK/5VSJyuCTfixtq\ny5OSD9L0woJTpaiIcBKTDxIaEkJUZAQZGVls/2MXNWtUY8WqNcS1asm9/W9n3lcL6N7zdtLSDjPt\n5edLNXNi8kGamoLv94uKiCAxOcXKHBFBRmYW23fupmb1qqxYvZa4lhfnP3fDb0K12BhiorUw9Gzo\nkDpdfeBSoKN9/yfgIyDN7t3rgFV91dYYE4/Vo3cEwBgTBfxNRBoaYwKBAV7H3SgizxtjZgNdsYaf\nMzikMcg7hsfj4flRwxj+7POEhYZSq0Z1IJfP5y+iRtVY3n71RX7/3xaGP/sC82b4rrHeu23J4/Hw\n/IjHGDHuJUJDQ6hVvdpJ2z/6cj43XdPDFzFdTU/3TtcKaAQssX+FAfWAH4DLsIbUa0A7oD2Qf4FB\nRA4Cm40xnwO3Ae95HfdH+/fdQOmeo5zCKbXlsVWqkJR8MP/+gaQkYqKj8+/HtbqED6ZNYtpL1qCq\nWb0aq9dvoONl1oXyCxtdwIGkJE6cOFGKmaNJPClzMjFVClZGcS1b8P6Ul5n2whhCQ0OoWb1q/raV\na9bTsnmTUstaVuiQOl0OVuvwFfav5iLyPbAUq+W4EfAF0BRrYC3x3llErgGeBi4BvvTa5F3H7tPu\nNKfUlne4rA0Lv10KwKbfNxNbpQqhIcH52+9+6DGSD6aQmZXFkh+X0a7NpdStVYt1m34FYPfefYRU\nqoS/v3/pZY67lEVLf7Ayy/+IrRJNaHBB5nseGU5yipV56U/xtG/dCoD9SckEV6pIUGBgqWUtK/R0\n73TfAV2MMcFYTcWvAk8Ay4DHgDQRyTHG5GKtukZiragwxtQDbhCRicBqY8wqH+QH3FFb3qpFM5pe\naOh9z2A8Hj+eeuwh5v1nPmGhIXS/4nJuvfF6Bj74KB4P3Nu/L1EREdx20/UMf3Y8/Qb9i+PHTzD6\n8UdKN3PzpjQ1jel930P4eTyMengI8/67iLCQYLp37kivG67lrqHDrJr1f/QmMsJaNCcmJRPtdR1Q\nFZ82GHvxunC+FRgInAA+E5Fx9vZlwBf2taWxQJyI9DDGXIFVq3471ileHazrVB+LyGTvjyAYYyZg\nXZ+acaYsrmswzs1l+7rS/xfCc1Xv4tYAbFv1k4+TnB1PTN1y12CsQ8qLMeZeoIGIPOHrLDqkSpYO\nKffQa1I2Y0w74HHgG19nUUoV0GtSNhFZDjT0dQ6l1Ml0JaWUcjQdUkopR9MhpZRyNB1SSilH0yGl\nlHI0HVJKKUfTjyCo88PjgQqVfJ2i+I5kA5Czyl0fi/O/2jc/QdWXdCWllHI0HVJKKUfTIaWUcjQd\nUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTD3OWYW6oWffmlrzPz1vMuh178QDDbu5K87rV87d9\n8MNqvvzlV/w9HprWqcawm7syddFylst2AHJyc0lKy2D+yHt8E96FdCX1J4wxt5zl8wcYY246w/ae\nfz3VnytOzfrrE8Yze8Zb/BQfz5aEraURq0huyfvzlj/YkZjC7KH9GNPnGp6btzh/W3r2Ed75diUz\n/3U7sx7qS8K+ZNZt38N9Pdrx7gN9ePeBPtzStgU927XwSXa30iF1Bnb7S5+z2UdEZojIp0UcLwh4\n+DxE+1NuqFn35pa88Zt30LWF1WDcsFo0aZnZpGcfASDQ359Af38yjxzl+Ikcso8eIzy4Yv6+x0/k\nMOfHNfTt1Mon2d1KT/fObDIQZ4wZBXSxHwsE7hCRBGPMrVhD5ziwSkQeNMaMxmo1ngl8CFSwf90P\n3AU0N8ZMEZHBJRncDTXr3tySNyktgya1q+XfjwwNJiktg9CKFagQGMDgq9vTY8x0KgYGcE3LC6kX\nW1Ac+vX6zXS4sD4Vg7R772zoSurMXsTq4ZsPPCMiXYB3gMHGmFDgOaCbiHQEGhhjunjt2xXYJSJX\nAH2BWPt4UtID6qy5rTHISXm9sqRnH2H61/HMH3E3i0b9kw079vL77gP52+fFr+fmy5r7IqWr6ZAq\nnn3Av4wx3wNDgWigMfA/EUm3n7MUaOm1z3KgnTFmKnCBiCwoxbxn5JSa9eJyUt6Y8FCS0jLy7x9I\nSyemcggACfuSqR0dQWRoMEEB/rRqWItNO/cBkHnkKPsOHaZmdLhPcruZDqnieQZYKCKXY1WoA+Ry\ncl16EFZFOwAishe4GJgHDLJPGR3BKTXrxeWkvB0urM+itQLArzv3EVs5lJCKFQCoGRVOwv5kso8e\nA2DTH/uoGxMJgOxJpEFstE8yu51ekzqzHKyvURUgwRjjAW4E/IHNQCNjTJiIHAY6A2OBbgDGmG5A\noIjMN8b8CkwBZlBKX3M31Ky7MW/L+jVpWrsat78yCz+Ph5G9uvPpig2EVaxAt4sbM/DKOAZMmkOA\nnx+X1K9J64a1AUhMTScqLNgnmd1OG4zPwBgTA6wCkoFKwHbgdWA6cCcQCjyCNcx+FJFhXhfO/wPM\nwrqongM8BcQD64BNItLrTK/tugZjwC15AerVtyoWE94d7+MkZ8f/6rvKXYOxrqTOQEQSgTqFbKrp\ndXveKfuM9rrbsZB9m/z1ZEqVH3pNSinlaDqklFKOpkNKKeVoOqSUUo6mQ0op5Wg6pJRSjqZDSinl\naDqklFKOph/mVOVTkPVznvxadvmTJypf05WUUsrRdEgppRxNh5RSytF0SCmlHE2HlFLK0XRIKaUc\nTYeUUsrRdEgppRxNh1QZtnlLAt2uv4lZcz48bduy+JX07DeA2/oPZPL0t32Q7nRuyTvu9WncNmgo\nvQc9zIbf5KRti39YTs97/8Xt9z/CrE++yH/8y0XfcuOdg7n57gdYunxlaUd2tXI7pIwxVxhjPv6T\n5xSrMt0Yc7UxZtD5zvhXuKW2PI9b8q5cu57tu/Yw941XePbxhxg7cWr+tpycHMa8OoXpLzzDrNdf\nZMmyFew7kEhKahqTZnzA+5MnMPX5p1n843KfZHercjukiqO4lekiskBE3ijVcH/CLbXledySN37V\nWrp1agdAw3p1SDucTnqG1cOXkppGWGgoURER+Pn50e7SS1i2ai3Lf1lD+9aXEBocTGyVKMY89qBP\nsrtVufnePWNMIPAuUBfIxmoiDjXGzMLqx/tIRJ4xxiwFNtq7JVGMynRgJdBMRB41xrwMxAEVgaki\n8pYxZgawB7gUq9ihr4isLsn365ba8jxuyZt4MIWmjRsVZIkIJ/FgCqEhIURFhJORlcn2nbupWb0q\nK9asJ+4Sq0knK/sIg54YTVp6OkPu7Eu7S1sW9RLqFOVpJXUHsE9EOgBvApWxmlvuBdoBD3g9d6OI\nDPG6X6zKdGNMRWC7XbveCatUNE8FEbkKeA3of57f21/jtlozB+X1roTzeDw8P/wRRox/hSEjxlCr\nelVyc624h9LSeH3sk4wb9jDDx72CVskVX3kaUq2AnwBEZA7wG7BaRDLtqnTvPrNTr2wWqzJdRLKB\nKGPMMmA+EOO1+Qf7912AT7u2nVRbXhxOyhsbHU3iwZT8+weSDhITHZV/P+6SFrw/aQLTxj9NaEgI\nNavHEh0VQctmTQgI8KdOzRqEBFfi4KFUX8R3pfI0pE5w+vs9XsRzj3rfKW5lujGmM3Al0NledR0p\n4rV8WvDopNry4nBS3g5tWrHoux8B2CRbiK0SRWhwQTPxPY89SXLKITKzslm6bAXtL21JxzatWLF6\nHTk5OaSkppGZlU1keGWf5HejcnNNCvgZa4B8ZIy5DmhR3B3PojK9CrBTRI4ZY24A/O0L7KXOLbXl\nbsvbqnkTmja+gN6DHsbPz8Ooofczb/7XhIUE0/3yDvS67mruemQEHg/c2/dWIiOsRXOPzh257b6h\nAIx8cBB+fuVpffDXlJuadXtYvIV14fwY8G/gJhHJ+xhBkohUsS+cDxGRjcWtTAe+ApoBY4CvgSzg\nM6A9kAb4Ax+LyH/sAdlTRAacKa/WrJeseqYpANt++d7HSc6Op2qDclezXm6GlNvokCpZOqTcQ9ec\nSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH0yGllHI0/TCnUsrRdCWllHI0HVJK\nKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH+3/FKGgzACmLwQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "metadata": { "id": "DRL6XhixwueM", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "" ], "execution_count": 0, "outputs": [] } ] } ================================================ FILE: experiments/tf_trainer/tf_hub_tfjs/notebook/EvaluatingClassifier.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "EvaluatingClassifier.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "metadata": { "id": "DnVolqQO5UMn", "colab_type": "code", "outputId": "4e8cb139-8ed2-4b08-e282-57465b9aa39e", "colab": { "base_uri": "https://localhost:8080/", "height": 53 } }, "cell_type": "code", "source": [ "!pip3 install --quiet \"tensorflow>=1.11\"\n", "!pip3 install --quiet sentencepiece" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "\u001b[K 100% |████████████████████████████████| 3.2MB 10.3MB/s \n", "\u001b[K 100% |████████████████████████████████| 1.0MB 19.5MB/s \n", "\u001b[?25h" ], "name": "stdout" } ] }, { "metadata": { "id": "nworUNj67VL5", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "import os\n", "import pandas as pd\n", "import tensorflow as tf\n", "import matplotlib.pyplot as plt\n", "from sklearn import metrics\n", "import sentencepiece\n", "import zipfile\n", "from google.colab import auth\n", "from google.colab import files\n", "from IPython.display import HTML, display" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "koTqnJ5t7vR5", "colab_type": "text" }, "cell_type": "markdown", "source": [ "Use Kaggle's My Account page to down load a kaggle.json file and re-upload it here." ] }, { "metadata": { "id": "zuJpXuS07hrD", "colab_type": "code", "outputId": "b60430c3-dd36-44e4-f054-be4befe8998a", "colab": { "resources": { "http://localhost:8080/nbextensions/google.colab/files.js": { "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=", "ok": true, "headers": [ [ "content-type", "application/javascript" ] ], "status": 200, "status_text": "" } }, "base_uri": "https://localhost:8080/", "height": 76 } }, "cell_type": "code", "source": [ "!mkdir -p /root/.kaggle\n", "token_file = \"/root/.kaggle/kaggle.json\"\n", "uploaded = files.upload()\n", "with open(token_file, \"wb\") as f:\n", " f.write(uploaded[\"kaggle.json\"])\n", " os.chmod(token_file, 600)" ], "execution_count": 0, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "Saving kaggle.json to kaggle.json\n" ], "name": "stdout" } ] }, { "metadata": { "id": "pF9BCpwc76_b", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "import kaggle" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "5IMnm-_f91DV", "colab_type": "text" }, "cell_type": "markdown", "source": [ "Download the test set and extract the labeled portion" ] }, { "metadata": { "id": "exMy3FQp8xg8", "colab_type": "code", "outputId": "96b64e4c-c76d-4db8-b527-84bf334c66cd", "colab": { "base_uri": "https://localhost:8080/", "height": 71 } }, "cell_type": "code", "source": [ "kaggle.api.competition_download_file('jigsaw-toxic-comment-classification-challenge', 'test.csv')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ " 21%|██▏ | 5.00M/23.4M [00:00<00:00, 29.0MB/s]" ], "name": "stderr" }, { "output_type": "stream", "text": [ "Downloading test.csv.zip to /content\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "100%|██████████| 23.4M/23.4M [00:00<00:00, 53.3MB/s]" ], "name": "stderr" }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "\n" ], "name": "stderr" } ] }, { "metadata": { "id": "GFjhB3WO9RuC", "colab_type": "code", "outputId": "97272c2f-fb02-4d61-a184-74778d81097d", "colab": { "base_uri": "https://localhost:8080/", "height": 71 } }, "cell_type": "code", "source": [ "kaggle.api.competition_download_file('jigsaw-toxic-comment-classification-challenge', 'test_labels.csv')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "100%|██████████| 1.46M/1.46M [00:00<00:00, 119MB/s]" ], "name": "stderr" }, { "output_type": "stream", "text": [ "Downloading test_labels.csv.zip to /content\n", "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "\n" ], "name": "stderr" } ] }, { "metadata": { "id": "4Grw9zJt9Udw", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "test_labels = pd.read_csv('test_labels.csv.zip', index_col='id')\n", "testset = test_labels.loc[test_labels['toxic'] != -1].join(\n", " pd.read_csv('test.csv.zip', index_col='id'))" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "neCTJdjJ-hKn", "colab_type": "text" }, "cell_type": "markdown", "source": [ "Load the pre-trained toxicity model from Google Cloud Storage" ] }, { "metadata": { "id": "DYVE2PB99XZx", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "auth.authenticate_user()" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "D9gQqslA-RKJ", "colab_type": "code", "outputId": "069cf772-656f-4696-e961-ec1d67902b9c", "colab": { "base_uri": "https://localhost:8080/", "height": 89 } }, "cell_type": "code", "source": [ "!mkdir -p tfjs_model\n", "!gcloud storage cp --recursive gs://conversationai-public/public_models/tfjs/v1/* tfjs_model" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Copying gs://conversationai-public/public_models/tfjs/v1/saved_model.pb...\n", "Copying gs://conversationai-public/public_models/tfjs/v1/assets/universal_encoder_8k_spm.model...\n", "Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.data-00000-of-00001...\n", "Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.index...\n" ], "name": "stdout" } ] }, { "metadata": { "id": "ZwYHVxgE_BIS", "colab_type": "code", "outputId": "9bc8b76a-2371-4291-b086-99d20ccf51de", "colab": { "base_uri": "https://localhost:8080/", "height": 289 } }, "cell_type": "code", "source": [ "predict_fn = tf.contrib.predictor.from_saved_model(\n", " 'tfjs_model', signature_def_key='predict')\n" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "\n", "WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n", "For more information, please see:\n", " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n", " * https://github.com/tensorflow/addons\n", "If you depend on functionality not listed there, please file an issue.\n", "\n", "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\n", "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use standard file APIs to check for files with this prefix.\n", "INFO:tensorflow:Restoring parameters from tfjs_model/variables/variables\n" ], "name": "stdout" } ] }, { "metadata": { "id": "CrVX18LN__4r", "colab_type": "text" }, "cell_type": "markdown", "source": [ "Load sentence piece model and preprocess test data" ] }, { "metadata": { "id": "bMjJEb25_59p", "colab_type": "text" }, "cell_type": "markdown", "source": [ "" ] }, { "metadata": { "id": "5IYO0GF2_fEf", "colab_type": "code", "outputId": "7c81d410-f695-4bbc-daff-d4a10f23ace9", "colab": { "base_uri": "https://localhost:8080/", "height": 35 } }, "cell_type": "code", "source": [ "sp = sentencepiece.SentencePieceProcessor()\n", "sp.Load('tfjs_model/assets/universal_encoder_8k_spm.model')" ], "execution_count": 0, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "True" ] }, "metadata": { "tags": [] }, "execution_count": 36 } ] }, { "metadata": { "id": "x2votZMZAnnG", "colab_type": "text" }, "cell_type": "markdown", "source": [ "Score the sentences with toxicity model" ] }, { "metadata": { "id": "uU3xQGiKA993", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "def progress(value, max=100):\n", " return HTML(\"\"\"\n", " \n", " {value}\n", " \n", " \"\"\".format(value=value, max=max))" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "Xs3Glf93Bp6O", "colab_type": "code", "outputId": "071890ba-b1b0-4e17-fd5a-6af93eaffb20", "colab": { "base_uri": "https://localhost:8080/", "height": 34 } }, "cell_type": "code", "source": [ "tox_scores = []\n", "nrows = testset.shape[0]\n", "out = display(progress(0, nrows), display_id=True)\n", "for offset in range(0, nrows):\n", " out.update(progress(offset, nrows))\n", " values = sp.EncodeAsIds(testset['comment_text'][offset])\n", " tox_scores.append(predict_fn({\n", " 'values': values,\n", " 'indices': [(0, i) for i in range(len(values))],\n", " 'dense_shape': [1, len(values)]})['toxicity/probabilities'][0,1])" ], "execution_count": 0, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", " 63977\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] } } ] }, { "metadata": { "id": "nXLm_GNWbuqP", "colab_type": "code", "outputId": "10537a68-d4be-4a10-9ab1-284489974cc7", "colab": { "base_uri": "https://localhost:8080/", "height": 71 } }, "cell_type": "code", "source": [ "!gcloud storage cp gs://conversationai-public/public_models/tfjs/perspectiveapi.csv ." ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Copying gs://conversationai-public/public_models/tfjs/perspectiveapi.csv...\n", "/ [0 files][ 0.0 B/ 2.0 MiB] \r/ [1 files][ 2.0 MiB/ 2.0 MiB] \r\n", "Operation completed over 1 objects/2.0 MiB. \n" ], "name": "stdout" } ] }, { "metadata": { "id": "9-x3fQEjb2-X", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "perspective_api=pd.read_csv('perspectiveapi.csv')" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "dGdFOCzbzDTJ", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "top_kernel = kaggle.api.kernels_output(kernel='tunguz/superblend', path='.')" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "ydkfpaSV4GCK", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "top_kernel_scores = testset = test_labels.loc[test_labels['toxic'] != -1].join(\n", " pd.read_csv('superblend.csv', index_col='id'), rsuffix='_predicted')" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "nkeZUDG_31c4", "colab_type": "code", "outputId": "30e2889b-15b0-4aea-bf2b-86794434776b", "colab": { "base_uri": "https://localhost:8080/", "height": 376 } }, "cell_type": "code", "source": [ "plt.figure()\n", "\n", "fpr, tpr, _ = metrics.roc_curve(testset['toxic'], tox_scores)\n", "plt.plot(fpr, tpr, label='Tensorflow JS model')\n", "\n", "fpr, tpr, _ = metrics.roc_curve(testset['toxic'],\n", " perspective_api['PerspectiveAPI'].values)\n", "plt.plot(fpr, tpr, label='Perspective API')\n", "\n", "\n", "fpr, tpr, _ = metrics.roc_curve(top_kernel_scores['toxic'],\n", " top_kernel_scores['toxic_predicted'])\n", "plt.plot(fpr, tpr, label='Top scoring Kaggle kernel')\n", "\n", "plt.xlabel('False positive rate')\n", "plt.ylabel('True positive rate')\n", "plt.legend(loc='lower right')\n", "plt.ylim(0.75, 1.0)\n", "plt.xlim(0.0, 0.25)\n", "plt.title('Performance on Kaggle Toxic Comments Challenge Test Set')\n", "plt.show()\n" ], "execution_count": 0, "outputs": [ { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAf8AAAFnCAYAAACoxECQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xd4FNX6wPHv7qZX0oHQO4TeqyAE\nRIqoiAZFuKKCiCAoKqIgVUQRUAT7Va7KFVCwwAVFqUr7gVgoUlMJ6T2bZNv5/REZWEMIYDb1/TwP\nzzNzzpR3J8O+O2dmztEppRRCCCGEqDb05R2AEEIIIcqWJH8hhBCimpHkL4QQQlQzkvyFEEKIakaS\nvxBCCFHNSPIXQgghqhmn8g5A3LjmzZtTr149DAYDSim8vLyYMWMGPXr0uKHtZGdnM2bMGPLy8li3\nbh1+fn4Oirjy6d+/P6+++iqdO3cG4NChQzz99NN8/PHHNG7cuFT3FRcXx6BBgzhx4sR1r3P33Xdj\nNBqxWCzExsbSsGFDABo3bsyqVatuOIY1a9aQlZXFlClTrnudrKwsli9fzs8//4xOp8PJyYlRo0Yx\nbtw4dDrdDcdQ1o4ePYqnpyfNmjW7ofU2btzImjVrKCgowGQy0alTJ5599lmCgoLYsGED27Zt48MP\nP7zu7VksFsLCwvjpp5/YtWvXDa9fmv7880+mTZsGQE5ODrm5uYSEhAAwcuRIHn300Rve5pkzZ8jO\nzqZjx45F6lJSUli0aJF27js7OzN+/Hjuvvvua27TaDTy448/Mnz48BuORxSS5F9JffLJJ9SsWROA\nI0eOMGnSJLZt24a/v/91b+PUqVNkZGSwe/duR4VZJZw5c4ann36alStXlnriv1kbN24ELv9w2LZt\n2z/a3rhx425oeavVysMPP0zLli3ZvHkzLi4uXLx4kcmTJ5OVlcXUqVP/UTxl4YsvvqBnz543lPw/\n+eQTPv30U95++20aNWqEyWRi1apVjBkzhm+//daB0ZaNFi1aaOfSzfyQuZpt27bh4eFx1eQ/Z84c\nmjdvzrJly9DpdJw7d46IiAhatGhBq1atit3mH3/8webNmyX5/wOS/KuATp06Ua9ePY4ePcqAAQP4\n4YcfeOONNzAajdSvX5+lS5fi7+/PypUrSUxM5M8//6Rfv3588cUXpKamMnjwYNauXcuZM2d45ZVX\nyMvLw9vbmzlz5tCmTRs2btzIjh07yM7OJiwsjL59+7Js2TLatm3Ljh078PX15aWXXmLp0qWcP3+e\n++67T/vyX7VqFd988w1Wq5XGjRvz2muv4ePjw8qVK0lPT9fi8fPzY/Xq1QQHBxMbG8vMmTNJSkrC\nx8eH+fPnExYWRkJCAnPnziUyMhKAWbNm0bdv3yLHIz4+ntmzZxMXF4ezszOPPPIId955J3FxcURE\nRDBhwgQ2bNhARkYGzz//PEOGDCn22CYmJjJp0iQWLFhA+/bttfIff/yRFStWYDKZ8PT0ZNGiRbRs\n2RKbzcaiRYvYtm0b9erV49Zbb2Xv3r188sknxMXF8cQTT5CVlUXv3r1JTEzktttuo2vXrtp2lVKs\nWrWKb7/9FpPJxIABA3j++ecxGAw3dE7s37+fJUuWkJ+fj4+PD3PnzqVVq1Y8+uij9OnTh7Fjx5KZ\nmcnQoUP597//zZYtW0hPT2f+/PlER0czc+ZMUlJS8PX1ZcGCBbRs2dJu+zt37iQtLY2XXnpJi61W\nrVosW7aMnJwcoPCHyezZs4mPj8fZ2ZkJEyZwxx13EB0dzdixY7n//vu1HzFLlixh1apV2rm5YMGC\n614O4Pvvv2flypUYjUYaNmzI0qVLqVGjBsuXLyc3N5f4+HhOnTpFQEAAq1evZtu2bWzevJk9e/aQ\nkpLCoEGDeO6550hJScFkMjF8+HCefPJJu89ssVhYtWoVy5cvp1GjRgC4uLgwbdo02rRpY7fs3Llz\n+fnnn3F1dWXFihU0adKEpKQkZs6cSXx8PGazmXHjxjF27Nhi/4aZmZnMnz+fY8eOYbFYmDJlCnfe\neafWUvDqq6/y0UcfkZKSwsSJE3nwwQex2WzMnz+f7du3U79+ffr27cv+/fv5+OOPi93ejfr000/5\n9NNPMZlMdO7cmYULF+Li4sK+fftYsmQJZrMZgKeeegqLxcJHH32Eq6srGRkZPP3003bbOn36NPfe\ne6/WUtS4cWO2bNlCYGAgAAcPHmTJkiVkZ2cTEBDAsmXLtG0bjUbGjRvHmjVrbvgzCECJSqdZs2bq\n4sWLdmUjRoxQe/bsUTExMapDhw7q1KlTSiml3nnnHTVlyhSllFJvvvmm6t27t0pNTVVKKXXgwAEV\nHh6ulFIqJydHdevWTR0+fFgppdS2bdvUoEGDlNVqVV9++aVq3769ioyM1NYLCwtTBw4cUDabTY0c\nOVLdfffdymg0qlOnTqlWrVqp/Px89ccff6gePXqo7OxsZbVa1b/+9S+1atUqLZYePXqouLg4ZbPZ\n1IQJE9Tq1auVUkqNGzdOffbZZ0oppbZv366GDBmilFJq7Nixavny5UoppaKiolTXrl1VWlpakeMz\nfvx49c477yillIqLi1OdOnVSsbGxKjY2VrVq1Up98sknSiml/ve//6mBAwde9RjfeuutateuXeqO\nO+5Q69evt6szm82qc+fO6ujRo0oppVauXKnGjRunlFJqx44dKjw8XOXk5Kj09HQ1ePBgNWbMGKWU\nUlOmTFGvvvqq9rlat26tvvzySxUbG6tatmyplFJq06ZNaujQoSorK0uZzWY1YcIELd6ruXLdS7Kz\ns1XXrl3Vr7/+qpRSasuWLWrw4MHKZrOp2NhYdcstt6i0tDQ1f/58tWzZMqWUUsuWLVOzZ89WSik1\nZswYtW7dOqWUUlu3blXDhw8vst+XX35ZzZkzp9i4lCr8O77//vtKKaViYmJUx44dVXx8vIqKilKt\nWrVS33zzjVJKqUmTJqn+/furtLQ0lZqaqsLCwlRcXNx1LxcZGak6dOigzp49q5RS6q233lLTpk3T\nPlevXr1UfHy8stlsavz48erdd99VSikVERGhNm/erJRSatGiRdr5l5ubq5588kmVnJxs93lOnjyp\n2rZte83PvH79etWhQwd14sQJpZRSs2fP1o7rnDlz1Lx585RSSkVGRqqwsDCVkJCgzGazatasmUpK\nSlLr169X48ePV0op9eyzz6rnn39eWa1WlZycrHr37q3Onj2rLX/pb/fLL7+odu3aKavVqrZv364G\nDRqkcnNzVVpamho0aJB2bha3vWt9lkuxXPLTTz+p3r17q5SUFGWz2dRzzz2nxTFs2DDtnDtz5ox6\n9tlnlVJKTZs2TX3wwQdX3cf8+fNVz5491XvvvadOnjypbDabVpeenq46deqkDh06pJRS6osvvlAR\nERFKKaXWrl2rJkyYcM2/hbg2eeCvCti9ezcpKSl07NiRPXv20LVrV60pMyIigh07dmC1WgFo167d\nVW8N/P7779SsWZNOnToBcNttt5Gens6FCxcAaNCgAQ0aNNCW9/HxoVu3buh0Opo2bUrXrl1xd3en\nadOmWK1W0tLSaN26Nbt27cLLywu9Xk+HDh2IjY3VttG5c2dCQ0PR6XS0bNmSixcvUlBQwMGDBxk2\nbBgAAwYMYP369RiNRg4ePMi//vUvAOrXr0+nTp2K3LIwm83s27eP+++/H4DQ0FC6devGgQMHgMKr\nt0v3E8PCwoiPjy/2uM6bN4+MjAxSU1Ptyp2cnNi3b5/WEtC5c2ftcx0+fJh+/frh6elJjRo1GDp0\nqLbe4cOHtc8VHh5OcHBwkX3u3LmTkSNH4u3trd1D//7774uN8WqOHj1KnTp1aNeuHQC33347SUlJ\nXLx4kTp16jBu3DieeeYZ9u3bx+OPP263rtFo5P/+7/+0uAcNGsTnn39eZB+ZmZna1dnVFBQUcODA\nAUaPHg1A3bp16dKlCwcPHgQKbxvcdtttADRr1oy2bdvi5+eHv78/AQEBJCUlXfdye/bsoWfPntot\nmdGjR/PDDz+g/uq5vGvXrtSqVQudTkerVq24ePFikXgDAgLYs2cPR44c0a7W//75MjMzCQgIuNah\n1+K81FLSqlUrEhISgMLWgFmzZgGF/5/8/Py0/19Xs3PnTsaOHYterycwMJCBAweyfft2rX7EiBFA\n4Xmcl5dHRkYGhw8fpn///nh4eODn52fXqlXS9q7Hzp07GT58OAEBAeh0OiIiIrRt+Pv78+WXXxIZ\nGUmTJk1YsmRJidubNWsWkydPZteuXYwcOZI+ffrw3nvvoZTi0KFDNGjQgC5dugBw1113ceLEiSL/\nH8XNkWb/SurBBx/UHvgLDQ3l/fffx9PTk+zsbA4fPszgwYO1Zb28vMjIyADA19f3qttLS0vDx8fH\nrszb21v7j/b39Tw9PbVpvV6Ph4cHADqdDr1ej9VqJS8vj8WLF2tf+JmZmfTr189u+5cYDAasVisZ\nGRnYbDatTqfT4enpSWJiIkopIiIitHWMRiPdu3e3iysjIwOllN22fXx8SEtL0/ZzKVa9Xo/NZrvq\n8QB4+OGH6devH/fccw9t2rShV69eWt0nn3zCpk2bMJlMmEwmrdkyKytLe0AKsJvOysqyO45X1l2S\nnZ3Nhx9+yLp164DC5Hcjz3FA4d/yyv3odDrtb1m7dm3uueceli1bxmOPPYarq6vduhkZGej1eu3v\ne+Xf9kp+fn4kJiYWG0N6ejpOTk5254mPj492Pjk7O+Pi4gLY/00uzV/6sXo9y2VlZXHgwAG7c97T\n05PMzEyg8Py/5NK5+Xfjx48H4KWXXiIlJYUxY8bwxBNPFPnMSUlJ2Gw29Prir5uu/MxXfpZff/2V\n5cuXk5CQgF6vJy0t7ZrnX3Z2NlOmTNFuqxQUFNj9mLx0jl+KxWq1kpmZSf369bVlrjzHStre9cjK\nymLv3r3s2LEDKLxNdenzvfbaa6xevZoHH3wQLy8vnnnmGQYMGHDN7RkMBu6//37uv/9+cnNz2bFj\nBwsXLiQoKAir1cq5c+eK/F0l+ZcOSf6V1JUP/F0pODiYnj178uabb97Q9gICArQfCFD4n/rSlc75\n8+dvKsY1a9YQFRXFxo0b8fT0ZPny5ddMGFD4BavT6UhPT8ff3x+lFDExMdSuXRuDwcCXX35p9+V6\ntfX1ej2ZmZlaAszIyLiuK7a/a968OaGhoSxevJgZM2bw5ZdfUrt2bX755Rfef/99NmzYQJ06dfj5\n55+ZPXs2UJhojEajto3k5GRt2tPTs9i6S4KDg+nfvz9jxoy54XgvCQwMtPtb2mw2uyv1lStXMnLk\nSDZs2EBERITdFa6fn5+WUH18fLTjf2VCAejWrRsvvvgiBQUFdj8gIiMj2bNnD6NHj8ZisZCTk6Ml\n35v9O5QkJCSEPn36sHz58pvehrOzMxMnTmTixImcP3+eRx55hM6dO9v9uGzcuDG+vr7s3LmzSFJb\nuXIlDzzwwDX3MWPGDCZOnMioUaPQ6XT07NnzmssHBQXxzjvvFHnI1GKxFLvOtc6/4rZ3I4KDg7n3\n3nuZPn36Vevmzp3LSy+9xO7du5k+fbr2w/9qcnJyOHLkiPbcjqenJ8OHD+eXX37h9OnT9OjRgxYt\nWvDf//63yLpHjhy56c8gCkmzfxXTu3dvDh8+rDVD//777yxcuLDE9dq2bUtKSgpHjx4FYMuWLdSs\nWZM6dercdCypqak0atQIT09PLly4wO7du+2+mK7GxcWFXr16sWnTJgD27t3LhAkTcHZ2pm/fvloT\ndF5eHs8//3yRJlwnJyd69+6tXTnHxMRw+PDhEr9or6Vfv36MHDmSKVOmYDKZSEtLIyAggNq1a5OX\nl8emTZswGo0opWjTpg27du0iPz+frKwstm7dqm2nbdu22vzOnTu1pu0rDRgwgK+//pq8vDwAPv/8\nc+1YXK927doRHx/P77//DsA333xDvXr1qFmzJseOHWPPnj3MmjWLBx54oMi54e7uTo8ePbQH7Hbt\n2sWkSZOK7OOWW26hfv36zJw5k9zcXKDwQcvp06djs9m0v+Olv0NUVBRHjx694ddRr0efPn04dOgQ\ncXFxQOFtj8WLF5e4npOTE1lZWUBh8/P+/fsBqFevHoGBgUVeVzQYDEybNo0FCxZw/PhxoPA209Kl\nS9m5c6dda9PVpKWlERYWhk6n44svvqCgoOCa/x8GDBigne9ms5mFCxdy8uTJa+6jbdu27Ny5k4KC\nAjIyMvjuu+/+0fauFtO2bdu0H5dbt27l448/pqCggAcffJDU1FR0Oh2tW7fGYDBor4BeOs5XUkrx\nzDPPsHnzZq0sMTGR/fv306VLFzp16kRsbKz2GmBkZCQzZ84ECv922dnZ2q0dcePkyr+KCQ4OZsGC\nBUyePBmz2Yynp6d2n/FaPDw8WLFiBQsWLMBoNOLv76+9fnOzIiIimDp1KrfddhvNmzdn5syZTJky\nhY8//via6y1atIgZM2awdu1afH19Wbp0KYB2VbFhwwYA7rjjDmrVqlVk/Xnz5vHiiy+yceNGnJ2d\nWbhwIbVq1dKSw82YPn06v/76K/Pnz2f27NmsXbuW8PBwQkJCmDVrFr/99htTp05l2bJl7Nq1i8GD\nB1O/fn1uv/12Lak888wzPP3002zZsoVbbrmF9u3bFzm+4eHhnDlzhrvuugsoTESLFi26oVi9vLxY\nsWIFc+fOxWg0EhAQwOuvv47NZmPOnDnMnDkTV1dX/vWvfzF8+PAiz00sXryYp59+mk8//RRfX19e\ne+21IvvQ6/W8++67LFu2jBEjRuDs7Iy7uztjx47Vnqm4dKw2bNiAs7MzixcvJiQkhOjo6Bv6PCWp\nWbMm8+bNY9KkSVgsFry8vHjhhRdKXG/gwIEsWbKEmJgYRo8ezdy5c8nNzUUpRXh4ON26dSuyzqhR\no3B1deX555+noKAAnU5H9+7d+fjjj3F2dr7m/p588kkee+wx/Pz8GD16NKNGjWLWrFl89tlnV11+\n+vTpzJs3T3vmoW/fvjRr1uyaCe+2226zO/8GDx6sXSUXt70b0aFDB+0NDKUUgYGBLFy4EFdXV0aM\nGMGYMWPQ6XQYDAbmz5+Ps7Mz/fv3Z9asWVy4cEH7vwyFty0++ugjli1bprVUurq68vDDD9O/f38A\nli9fzosvvkheXh4uLi489dRTQOFzHG+88Qb9+vWTV5Vvkk7JTychSpVSSkvqn332Gfv27dM63rmy\nbuTIkUyaNInw8PByi1VUPVeeY2vWrOHIkSM3fBtQVH3S7C9EKTp58iQDBgwgMzMTi8XC999/r70V\nsGTJEubNmwfAuXPnOH/+PK1bty7PcEUV88cffxAeHk52djZms5kffviBDh06lHdYogJyaPI/ffo0\n4eHhfPrpp0Xq9u3bxz333MN9991n1x3pyy+/zH333UdERIR2z1KIyqJly5bceeed3H333QwZMoSQ\nkBDt4b2HHnqIqKgoBg4cyOOPP86cOXOu+tCmEDerTZs2DBs2jDvvvJMhQ4YQGhqqvW4pxJUc1uxv\nNBqZOHEiDRo0oHnz5kWeXh4yZAgffvih9uU4f/580tLS+PDDD3n33Xc5d+4cs2bN0h4YEkIIIUTp\ncNiVv4uLC++///5VOzKJjY3F19eXWrVqodfrtS4o9+/fr93/bNy4MZmZmVpXoUIIIYQoHQ5L/k5O\nTri5uV21Ljk52a7jEn9/f5KTk0lJSbEbWe5SuRBCCCFKT4V+1e967khc+WSrEEIIURFY8/LIOvkn\n6opeHI3RMSiLBa7IWSar6Zo9PV5N0wduvhOwS8ol+QcHB5OSkqLNJyYmEhwcjLOzs115UlISQUFB\n19yWTqcjOTnbYbEKCArylmPsYHKMy4YcZ8erqsfYajRiuhiPNdv+sxlP/Yn+r1ZuZTJhPHEMm8mM\nOTHBYbFU2uRfp04dcnJyiIuLo2bNmuzcuZOlS5eSnp7OypUriYiI4Pjx4wQHB9v1yy2EEEKUBktW\nFqqgAACb2UxG1Gnyz54h49RxrJ7umG1mTFYTBp2BwJj0m97Pz+08UX9rnE7yt+8QqrZnCJ7ORcfQ\nKE6vkhcpkcOS/7Fjx1iyZAkXLlzAycmJ7777jv79+1OnTh0GDhzI3LlztbGdhwwZQsOGDWnYsCFh\nYWFERESg0+l46aWXHBWeEEKIcpJtyiHPkndD6yQZU8jPzkRvvGK9fBOGlDTUlQMtKcXF3ERcDC4k\nG1NBB275VpodiMbJbKPARY+rqfhmdncAMq5al+FlINXXwIVgF1wMzrgaXNHbFHmezhg9CxO60kGm\nnxtWg44MWy4hHsG0CwrDy8WLQLfCZ9oUEOQegLuTGy4GF/S6su9yp0r08FcVm5gqkqrajFeRyDEu\nG3KcHS8oyJuExAyS8lKIyorl05PrcTG4cOnit8BqKpxQCr8sq1autymC0yxY9faXya4mG51PGvE2\n3th98WvJ8HfDNd9Ctq8rud6Fo0Y65eSR17AWuDjj3LUTjWs0wkXvTLBHIKBD5+SE7q8fGQadHoPe\nUGrx3KigoGuPI3E9KvQDf0IIISqG9PwMIrNi+D35BB7OV3+TK99SwMGEIxisiuZR+dxyJIdRNQrT\njKvBRVvOKyMf14KiQytfD3OIP+bQy6+QG3KMFDStZ/cQnU3Z8HT2wM3JDTeDK+5O7iizGZ/evXFy\n95SHxJHkL4QQ1ZLFZiHHnEuOKZdscw6n0s4SmRXN2YxIdOjsEqRN2XAx22gRmY+LWeGRbqFmqhmf\nXBsK7O5pdwX0V7Qn104xg94AuvzLhVc83e7b91YtcVtzcnBv1Ai9m7tdrMpixiOsDS4hIaV5CKo1\nSf5CCFEF2JQNmypMqhabhZNpZ0jNTyPXbCQ3L7sw0ZtzyDHlkmPORRmN+OQWLu+XZcHZoqidbaVJ\nno2G8SbyalxOwL4pxQ89rAMMDepr887OBnQ2AwWxsfgPvh2fXr1x9g9wzIcWN02SvxBCVGLRWbF8\nfOK/JBkLX5N2siiC0s0EZlhoezqPepk317zukmJE7/7XDwA3N2z5hVfuwQ/+C52TE24NGuAcGITe\n1dVuPXmuonKQ5C+EEJWEUgpbbi55sdGcy4ji7K5vcS9Q3HehgAxvA/5ZxSd61+bN0RuctAfsUGDN\nM+LRomXhrNWKe6PGoNfhVq8BToGBcm+8CpPkL4QQFYjJauJ8ZnRhD6e/n0R/6Fd0ZyLJ9XHFM6tA\nW84daHPFev5ZVgzePlizszD4+ODTszc6JwOebTvg3qhRmX8OUbFJ8hdCiHJiNBvZd/H/+DPtDO4W\nHamnjxGaUEC9BBMhaRa7ZT2zCsh21+OdZyPRz4nYWi4EeAXTsmYrat8yCIOvr1ypi+smyV8IIcqI\nxWbheOopsk3ZfHHsC7r9kUvtZDODUi3FrmNsVJvUfm0x1KtLm4CWuBhcaOTkRh+9fH2LmydnjxBC\nOFjOudN8t2EZzhZFi6gCnPXw+NX6rNHp8OnbD9eatfHu1g2Duwc6J/maFqVPziohhHCAzMQ4Ivd9\nj8eWPYD9/XknG5j8vHE12Qi47Xa8u/fAycdXEr0oM3KmCSHETVBKkX/+HNmH/w+sVtDrSY89i+7U\neW2ZK4dqyXXTE/zQeAKC6uJat57cnxc3rMBkJSUzT7r3FUKIsmBOS8WSlkbW/n2g15O588erLvf3\ndB5dxx2/gFAsw/rTtk57PG5g5DZRtR07n0p6dkGx9cmZeWTkmEhINXL2QqZd3bevj/jH+5fkL4QQ\nVzAlJGDLz6PgwgWyD+zDePJEscta9RAX7MKpBq6k/NWHfYvQNtzabjghHkE0K6ugRYWXmpnP8ag0\nbErxv/3RpGTml7zS34T4e2Czlc4AR5L8hRDVmrJaKYiNIfXbr8n97ddil0vzMRBT04VT9d3A1ZkM\nbyfydRbcndy4v8U9dAhqI035VZDFaiMpPY+YxGxy8swUmK2cu5CFl4dzkWWPnEoiyNe9SHlMUs5V\nt+3j6cKofo2L3bdS0KCWN17uztTwci12uZshyV8IUa2YU5K5+N47OIeEkL1/31WXOVfHhUwvAyg4\nW8+NlBAP+tftQ9+aHRnhVsNuhDpRdVisNqxWhdlqY8naX7iQnHvD24hJysHd1X64XzcXA/kmK0E1\n3OjcIpjQQE9CA72oX/Of37u/WZL8hRBVnlKKvD9PcmT2f8i/mABA/vlzWr0l2I+Luhx+6OZDlqde\nG2WuTWBLHms6gkB3/3KJW1ybTSmSM/JIzcznQnIuFpsNHTqUUvx2NgU/n6JDDyekGUnNzMfT3f7K\nPTGt+MGLmoT64uXuTICvG83q1sBmU9QL8cKgL9rSE+jrjv4q5RWNJH8hRJVlNRqJX7mCvDOni9St\nHexHvquePFc9Ficd4AfAnO7PEOIRVMaRiuIopcjNtxR2dwxsPxxLbGIONgV/nE8tYe3MYmty8sz4\nel5uwfHxdMGYb8HP24VaAZ4kZ+QxaURr6gR7lcbHqHAk+Qshqgyb2Yzx5HHyIyMxnjxB/tkzdvUX\ngpz5vocPWV72zbIt/Zvh4eTOmJajcJEm/XJnsdpYt+MsPx6Ju67lm9bxJayBPwq0pnSlFLUDPXFx\nMhRZ3tVZj4db0Xv21YkkfyFEpWc8eYLET9dgTky8av1/b/Mjyd8JdDq8Xbxo6VWb2xuE09C3Hnqd\nvoyjrd6M+WaM+RaOR6WRkGbUkvOJ6DSS0vPQ63Rk5pqKrNepWVDhu5QKQoM86d+pDga9Ds9qnsRv\nliR/IUSlZDObyD93jrilS4rUBYy4i1wnGx/k7SW1xuWvuQ9GvEqBDDVfZo6eSWb9jrME1ih8Av54\nZFqJ6/h4uhDi505qVj7jBregZ+ua8haFA0jyF0JUCkopzEmJGE+cIHXzN1gzM+zqbc4GPr+rNslO\nBbgZjpBvzQfXwq+4iOZ30Se0Bz5u3iRnS/a/UcZ8C1/sOkt0YjYuTgb+nouz88xcSM7F3dWAXqfD\npsBktmK1Fd6nT0zPs1u+TpAngb7u+Hm70qVFsLa9Gt6uhPhJR0hlQZK/EKJCyzq4n4T33y22/kxd\nV4608iAxwBko7DEt35pPqFctEnKTeLXPXNycSvcd6aomLSufvAILyZn5/BmdzsGTiYTUcOd0XCYu\nTnpMluvrWCavwErD2j5Y/lrxzRexAAAgAElEQVTealPk5plZ+Gg3rXnfYNChlyv5cifJXwhRISml\nOPvcU6i0dLvyY43dcDYrDvetS0iN2rQJbMUtNitBHgF4O3tR26smTjLc7TUdO5/KWxv/IMTfg9hi\nOqDJzCm8726y2GhQ05uohGweGdaSsIYBeF+lgxsAvU5HUJA3ycnSulLRyf8QIUSFoWw20iJPEfPu\nW3im2Xew8sb9wdRw9WVej+dw0jsxvJxirKiUUhw5lUxOnpmT0el4uF3+ek9MM5KQZiQjx/5Butik\nnEvP0NGyvh81Azwwma20qOcn99qrOEn+QohyZzObOPfkEyhTYXLyvKJuRxcv2g4fy9OewdT1Cq32\nV/X7jydw+M8kPN2cyckzcyau8NmH3HzLdW+jXrAXOr2OZ0d3wN21eh/P6kr+6kKIcmG2momMPUH8\n2o+pfd6+af94Y3eatL+FFoPu4TFD9X6Vy2K1kZVrYvev8Xy7L6rY5XS6wmb3to0D6NIimBpervh6\nXe6zwN3VqdT7hxeVlyR/IUSZi/5lLwWrPwSg9hXlH94ZwNgej3JXQIvyCawCiU7IZt7H/3fVun4d\nQhnUpS56vQ5ngx4/b0nq4sZI8hdClIlsUw7RZ3/FZdkHduX5IX5YO7emw10PU/SN/erldGwGB44n\nsOvXeLtyX08X/H3c6B4WwsDOdcspOlGVSPIXQjhMQm4ia058Tnx6HA9/lYKbSWl1+S46mixfiYdr\n1ew7/XoopTh4IpFth2Kw2RRxfxtFrkW9Gky+u430YidKnSR/IYRD5Cde5Pgbc7gryWxXbvL1xO3h\nB2nbqns5RVb2lFLs/i2e5Iw84pNzMVkK7+NfSLn6kLHPjO5A83o15H144TCS/IUQpSol+jQpry9H\nb8zjygZqJ39/aj02GfdGjcstNkdSSnEsMo2jZ1LsxnM3mWz8+Mu1B6jp3iqEe/o1xtPNGVeXogPR\nCFHaJPkLIW6aOSUZa04u6T9+T0bKBQxnogGwGypn6iM0bdOrSr8zbjJbeez13SUu1699bbqH1cTD\nzYmgv8Z9d3aSgYVE2ZPkL4S4IeaUZOJWvI45IcGu/Mrr1QwvA3nj7yasaQ8C3f3LNsAypJRi++E4\nPv/x8tDBLev7cWefhnZN9gaDjnoh3tKMLyoMSf5CiOtiNRqJXfIypgv2Tdhn67lhsNg438KfOs06\nMLTTqCo/TG5aVj7JGXksWXvUrvy5+zvQvJ5fOUUlxPWT5C+EuCpls5G5exdpWzdjSbMfitXk6cq2\nLh5E1nYmwM2fQfX78UitzjhX4d739h9PYO9v8fwZk1Gkrn/HUB4Y2KxK39oQVUvV/Z8qhLhptoIC\nzk6eWKTc6OfB/qYGjjVxp6ZnCOPq30qn4HYY9FX3ITWrzcYn351iz28X7cr9fVxpVrcGAzrWoXGo\nbzlFJ8TNkeQvhEApRd7pU6Rs/IKC2Bitj30A1bE1W1sbOGMpTH71vEN5tMEA2ga2qrLN+xarDZPZ\nxqE/E/nPtlNaeYCPGy+M7STd5IpKT5K/ENWYzWwm6ZM1ZO376ar13/TzI7J2EljA08mDh8Lup4V/\n0yrXvF1gtmKx2th3LIH1O85itakiy4zs24ihPRqUfXBCOIAkfyGqqZzffiV+5Qq7MreGjQh56GFM\n/j7M3LdAK6/rHcpTHR/HpQoNsmOx2ti09zxbD8QUu0yDmt60auDPXbc0xKCvmq0conqS5C9ENZMf\neZ6YRfPtygJH3YffoMGk5afz1P5X7Ooea/sv2gS2KssQHabAbOWVz37BarNx7kKWXV1ooCfBfu7k\nFVh48p520tmOqNIk+QtRTSiLhTOTHgV1uUnbyc+fhotfJT4/mbcPLSc+9/K7+01rNGJIw3Ca+TUp\nj3BLTUxiNrt/i8dqVez5Lb5I/ZDu9bmnX9XsdVCI4kjyF6IasJlNnJ00wa6s8Yq3MHh5YbFZePnQ\ncru6V3rPwdulcg+4Y8w38+n3pzlwIrFI3WMjwmjXOFCu7kW1JclfiCrKmpND7rHfydyzm7zTl59Y\nDx4zlhr9+mvz03a9oE3P6f4MIR5BZRqnI7z86RHOxmXalU0d2ZYGdWvg42qQnvZEtSfJX4gqJv2H\n7aRs3GD3ut4ltadOw6tte21+8/nvUBTeBohofnelT/xWm41HX91lV3Zrx1DG/NUBT1CQN8nJ2eUT\nnBAViCR/IaoIpRTnpk/BlpOjlend3fHt0xevTp1xb3z53n1CbhILDi7V5ut6h9IntHIPsbvoP4c5\nF3/5Ib5GtX14cWzncoxIiIpLkr8QlZiy2Qo75bFYiF28UCv3aBlG6LSn0BkK72n/nnycP05+gQ0b\nBy4eLrKd5zpPLbOYS1NegYW9v1+0G1gHYMrINnRoWrlbMYRwJEn+QlRSxXXBW/PRx/Dp1p34nARO\np59jw5mvr7q+t4sXc7rNwMPZw9Ghlrp9xy7yweaTRcofur0FfdrVLoeIhKhcJPkLUUmdf+5pbbpG\n/3B0Tk743tofl6BgLDYLiw4ts1u+lmcIdzQaTE3PYALc/Ctlf/xKKZav/41jkZcHGqoV4EHHZkHc\n0ashzk7SEY8Q10OSvxCVjLJYiFm8ULu3X2/2XNzqN9Dqt0fv4qtz/9PmH249hpoewdT2qlnWoZYa\nm1Ks+/Es2w/H2pW/90w/nAyS8IW4UZL8hahkzj01FZvRCIB7s+Z2iT+zINsu8c/sMo263pW7Gfw/\n2/5k16/2nfMM7VGfkX2lYx4hbpYkfyEqkcT/fKQl/loTJuHdtZtW994f/+G35GPa/Kr+r5Z5fKVF\nKcWmvefZvC/arrxry2Am3hFW5QYWEqKsOTT5v/zyy/z222/odDpmzZpF27ZttboffviBt99+GxcX\nF4YOHcqYMWM4ePAgTz75JE2bNgWgWbNmzJ4925EhClFpZB08QOae3QC4t2ipJf60/HQWHHwdk/Xy\ne/0Le84qlxhLg9Vm46m3fibbaNbKOjUPYvJdbcoxKiGqFocl/0OHDhEdHc26des4d+4cs2bNYt26\ndQDYbDYWLFjApk2bqFGjBo8++ijh4eEAdO3alTfffNNRYQlR6RTEXyB921Zt2F3Ptu0InTodm7Kx\nI3Yvm85u0ZZtH9SGR9s8WF6h/mM//3GRD7dcfoq/brAXcx/qIlf6QpQyhyX//fv3awm9cePGZGZm\nkpOTg5eXF+np6fj4+ODv7w9A9+7d2bdvH6GhoY4KR4hKKeHfH2hJ/5LaTzyJ1WZl6q7n7cqf7DCR\nZn6V9z74lBV7yM23aPP/ur0FfdrWksQvhAM4LPmnpKQQFhamzfv7+5OcnIyXlxf+/v7k5uYSFRVF\naGgoBw8epGvXroSGhnL27Fkee+wxMjMzeeKJJ+jVq5ejQhSiwkr99mtSv95kV1ZrwiS8unRle/Qu\nvj6/VSvvVrMTQxsOIsDdr6zDLDXPvr1PS/xdWwYz4Y4w6X9fCAcqswf+1BXDiOp0Ol555RVmzZqF\nt7c3derUAaBBgwY88cQT3H777cTGxjJ27Fi+//57XFxcrrntoCBvh8Yu5BiXhUvHOG7jV3aJ38Xf\nny4fvQ/Ac9+/TGT65dfdZvSaSNc67amsElJzefTlH7T5/p3rMn10R4fuU85lx5NjXPE5LPkHBweT\nkpKizSclJREUdLm7za5du7J27VoAXn/9dUJDQwkJCWHIkCEA1KtXj8DAQBITE6lbt+419yUDdTiW\nDIbieEFB3iRdTMd48gQX1nyilTf74GMA/oyJYc7+xVp5fZ+6PNt5ClA5z3+rzcbqTcc4eubyd8SE\n4a3oHlbToZ9HzmXHk2PseKXx48phyb9Xr16sXLmSiIgIjh8/TnBwMF5el8cHf+SRR1iyZAnu7u7s\n3LmThx56iG+++Ybk5GQefvhhkpOTSU1NJSQkxFEhClFh5JyP5Mz0GXZlTd//CJuy8cWZb9kd97NW\n3r9uH0Y2HV7WIZaaF94/wMVUo13Ziim98fG8dgufEKL0OCz5d+zYkbCwMCIiItDpdLz00kts3LgR\nb29vBg4cyL333sv48ePR6XRMmDABf39/+vfvz4wZM/jxxx8xm83MnTu3xCZ/ISozZbOR+vUm0rZ8\nq5X53T4U/9uHkpafzpz9r9gtv6zvQlwNle//hMVqIy0rn5nvHrArv6NXA+7s06icohKi+tKpK2/G\nV1LSxORY0oznGMpq5czEh+3Kmqx+D72LS5Gn+R9seS/da1Wu4WltSnE2LpNXPvulSF3XlsE8NqJ1\nmcck57LjyTF2vArd7C+EuLaEf3+gTTea8DCGzr3Q6fWk5afzwbFPtbpXes/B28Xrapuo0J5Yvod8\nk1Wb93J3xt/HlVH9mhDW0L8cIxNCSPIXoowpi4WYVxZREBUJFA7BW2voQJKTszmS+Cv/PbWRPEs+\n7QLDGNNyVKUacvdUTDofbjlJSma+Vta+SSB39mlIvRB5AlyIikKSvxBl7MqBeVxq1sKnW3fyzPn8\n58Q6DiYcwcXgwgMtRtGjVudK1cHNa/89ysnodG1eBwzsUpeIAU3LLyghxFVJ8heijCiLhfh3V2uJ\nv/aUaXi1a09kZgyfHFpHYk4y9bzr8FDYaII9gkrYWsWRm2/mP9tOaYlfr9OxYmpvvNydyzkyIURx\nJPkLUUbOPPaINu1SqzYebduyLepHtkRuRynFwHr9GNZoEE76yvHfMifPzNQ39tqVDexcl9HhcqUv\nREVXOb5lhKjECuJiiZ57eXTK4AfH4d6rF8uOvE1kVjQ1XH2Z2uMhQvS1yzHKG7P3t3g+2vqnNh/i\n507zen6S+IWoJCT5C+FApuQku8Rf67HJeHfuwpSdM7EpGwadgee7TqNhiGN7tvunjPlm4lONLF//\nK24uTqRnF2h1r03qSYCvWzlGJ4S4UZL8hXAQZbEQ9fyz2nzTdz5A5+TEU7tfxKZsADzVaRJezp7l\nFWKxLFYbv5xOZtvBGKIS7H+U5BUUvr7n7mrgrWm3VKqHEoUQhST5C+EAymq1u8cfOn0Gm2N+ZFvU\nj1rZsIaDaOBTrzzCK1ZmronpK38qtr5Xm5oM7FxXXtsTopKT5C9EKUv8z8dk7tmlzQeOvJfvXKP4\nMWqPVjas4SBubxhe9sFdQ+TFLBasOWxXdku72tzWtS61Aipe64QQ4uZJ8heiFOX+8btd4ncZfQ9r\n/KI5GxOplb1+y3zcnCrWPfKv9p7nm5+jtPkVU3vj41H5xhAQQlwfSf5ClKILbywDwDk4hLjJd/L5\nqU2Qcbn+rVuXVKh75CkZeTz7zn67sndn9MXZyVBOEQkhyoIkfyFKgVKK6Jde1OYDZj7LJyc+0ubH\ntryPbrU6lUdoxTp0MpF3vj6uzTerW4Pn7u9QoX6cCCEcQ5K/EKXgzKMPadNeEaNYceIjEo1JdK/V\nmTEtRlWohGpTilnvHSApPU8re3VSDwJ93csxKiFEWZLkL8Q/dOGtN7Rpt1F3sdr9N9KNGQyoewt3\nNRlaYRJ/vsnCxt3n+eFInFbm4qxn5ZN9pJlfiGpGkr8Q/0D0wnna6Hz6tmGs8viVnIJcRjS6nYH1\n+1WYxK+U4vFle+zKxg5uTr/2oeUUkRCiPEnyF+ImxSxeqCV+GtRhdbtMTGYzo5vfTe/Q7uUb3N+8\n+83le/tjBjWjX4dQ9BXkh4kQouxJ8hfiBimrlTMTH9bmbb7evN3LCjYY3/oBOga3LcfoitrzWzyH\nTiYBENG/Cf071inniIQQ5U2SvxA36PzT07TpvM6teL9ZKs56AxPbjKOFf8UY2MZitfHhlpMcPJFo\nVz6wS91yikgIUZFI8hfiBpyeMB5shf3y/9jFm2NNU/B08uDx9uMrTFe9aVn5zFi9z67Mz9uVpY/3\nrDDPIAghypckfyGuk/H0KS3xR9d05ljTwlfjpneaRC3PkPIMTROXnMOcDw9p83ff0ohhPRuUX0BC\niApJkr8Q1ylty7cAWHw9+ap/YV/3E9uMqxCJPy0rnx9/iWPrgRit7PXJvfDzdi3HqIQQFZUkfyGu\ng81swnj8GAD/HlB4xd8puB1tg8LKM6yrNvEDrH7qFtxc5L+3EOLq5NtBiBLYCgo4O3miNp/npifY\nI5BxrSLKMSr4/lAMn+84q827OOt5dFgr2jQKwMVZOu0RQhRPkr8Q15Cy8QvS/rdZm//3HQEAvNT9\n2XKJx2K18e8tJznwt6f4lz7eE3+fijVSoBCi4pLkL0QxCmJj7RL/5j4+ZHsZWNTrhXKJ50xcBos/\n/cWu7NaOoTwQ3gy9Xp7iF0JcP0n+QvyNNSeH5C/WkfXTXq3sjfuDAXj9lgW4OZX9Q3QrNvzG7+dS\ntfl7b23C4G4V49VCIUTlI8lfiCsom41z056wK1s9KhCAWV2nl3nitylFZHyWlvj9vF1ZPKG73NMX\nQvwjkvyF+IvNZOLs4xO0+VqTJjMzYz381TFOqFetMotFKcXUN/aSm2+xK399cq8yi0EIUXVJ8hfi\nL4kf/1ubrj15KntrpEFmYeKf0WlymcVhsdqY8Nouu7IOTQN5YGCzMotBCFG1SfIXAsiPPE/2oQMA\nWMbcxXPpn0N6YV2bwJY09K1fJnHEJeUw59+Xe+h7YGAzBnSSgXiEEKVLkr+o9qx5ecQsmq/Nr7L9\nrE23D2rNI60fLJM4dh29wH++O6XNPzO6Ay3r+5XJvoUQ1Yskf1GtWfPyODdlkja/8r4gbfqNfi/j\npC+7/yJXJv6V0/rg6eZcZvsWQlQv+vIOQIjyknv8mF3i57Gx2AyF9/hnd3u6zBK/1Wbjidd2aPP/\nntlfEr8QwqHkyl9UO8pmI3rubEzxF7Sy0BnP8Wz8RwDU965LzTIarMemFI++ukubv7NPwzLZrxCi\nepPkL6oVqzGXc1MvP7nvWq8+IVOn8vQvS7Sypzs9XmbxTLziqf5xg5vTt31ome1bCFF9SfIX1Yay\n2ewSf/ADD/JDaC47r0j8/2o1GoPe8R3o/P11vmcf7EyLUB+H71cIIUDu+YtqIvvw/3FmwnhtvtHS\nFeyqW8DOuJ+0sqc7PU6Xmh0cHotSyi7x39a1Ln3kil8IUYbkyl9UeZk/7bHvwGfqdJS3J9t/2QVA\nLc8QXuj6FDpd2QyOs2lvpDa94JFuhAZ6lsl+hRDiEkn+okpTSmmJ3+DlTYPFr2Jwd2f1b5d/DLzY\n7ekyjeno6WQA7uzdUBK/EKJcSPIXVVrWvssd9jRa9gY6vZ4sUzbHU/8E4Il2j5RpPAdOJHAhJReA\n8M51y3TfQghxidzzF1WWKTGRxI8+AKBG/3B0ej0mq5nnf1qgLdPCv2mZxXM8Mo33vjkBgLeHMx5u\n8ttbCFE+JPmLKsmWn0fUC89p84Gj7iWzIJvpu1/Qyl7rM6/M7vNn5pp4fd2v2vwbU/uUyX6FEOJq\nSkz+Fy5cYOrUqTz4YGH/5uvXrycqKsrRcQnxj6R+87U23fiNVeicnJn18+Ur/jndZuDh7F4msRw4\nnsD0lZffKvjguVvLZL9CCFGcEpP/7NmzGTFiBEopABo2bMjs2bMdHpgQN8uWn0/699sAqPX4FAye\nnjy95/I5+3KvFwnxDC6TWD79/hTvfXtCm1/4SDf0ZdTaIIQQxSkx+ZvNZgYMGKA1j3bp0sXhQQlx\ns2wFBZx94jFt3qtde6KzYimwmgCY1PYhfF3LpjOdL3adY8cvhV0Ih/h7sPTxntSWp/uFEBXAdT1x\nlJWVpSX/M2fOUFBQ4NCghLhZVyb+Os/MRGcw8OrhlQDodXpaB7YskzjWbj/ND0fitPnFE7qXyX6F\nEOJ6lJj8J0+ezL333ktycjLDhw8nPT2d1157rSxiE+KGpG37H/x1eyp0+gw8mrcgNS9dq3+tz1yH\nx6CU4u2vjnH4VOG7/HWCPJn/cDeH71cIIW5Eicm/VatWfPXVV5w+fRoXFxcaNmxIUlJSWcQmxHXL\nj44i5Yv1AHh16IRnWGtyzUbm7F8MgA4dbk5uDo3BZlM88upObb5ZHV9mjunk0H0KIcTNuOY9f5vN\nxuTJk3F1daV169Y0a9YMnU7H44+X3ahnQpREKUXMgrkAGLy9qT15CgBbI38oLNMZeO2WuQ6P49X/\nHtWmB3Wpy3MPdHT4PoUQ4mYUe+W/efNmVq5cSXR0NC1btkSn06GUQq/X07t377KMUYhiWY1Gzk29\n/GO00dIVACTmJrH7wj4C3QN4sdvTOOsd26HOV3vPczo2A4Axg5rRv2Mdh+5PCCH+iWK/EYcNG8aw\nYcNYuXIlU6ZMsavLzs6+ro2//PLL/Pbbb+h0OmbNmkXbtm21uh9++IG3334bFxcXhg4dypgxY0pc\nR4i/u/juam261sTH0RkKh+PdeHYLNmXjriZDHZ74N+05z7f7orR5SfxCiIquxG/FKVOmcPbsWdLT\nCx+cMplMLFy4kK1bt15zvUOHDhEdHc26des4d+4cs2bNYt26dUDh7YQFCxawadMmatSowaOPPkp4\neDgxMTHFriPE3xlP/Ynx+DGg8Ml+j+YtAPgz7QzHUk/StEYj2gWGOTSGX04n2yX+f8/s79D9CSFE\naSgx+S9atIiffvqJlJQU6tWrR2xsLOPHjy9pNfbv3094eDgAjRs3JjMzk5ycHLy8vEhPT8fHxwd/\nf38Aunfvzr59+4iNjS12HSGulB8VSdxrr2jzlxK/1WblyzPfokPHyKbDHdp975FTSazadEybl8Qv\nhKgsSkz+v//+O1u3buXBBx/kk08+4dixY2zfvr3EDaekpBAWdvmqy9/fn+TkZLy8vPD39yc3N5eo\nqChCQ0M5ePAgXbt2veY61xIU5F1iPOKfqUjH2FpQwIGF87T5nps2oNMXPru6/exe4nMTuLVhTzo2\nauGwGP6MTrNL/BuXDMfZ6Z8NlVGRjnFVJsfZ8eQYV3wlJn8XFxegsKc/pRStW7dmyZIlN7yjS90D\nA+h0Ol555RVmzZqFt7c3depc/R7pletcS3Ly9T2DIG5OUJB3hTrGKZu+1KabrHqXlNTCIXLzLHn8\n9/evcTG4MLB2f4fG/Mybe7Xp957pR0Z67j/aXkU7xlWVHGfHk2PseKXx46rE5N+wYUM+++wzOnfu\nzEMPPUTDhg2v64G/4OBgUlJStPmkpCSCgoK0+a5du7J27VoAXn/9dUJDQykoKLjmOkIoi4W0Ld8C\nEDT6AfSurlrdd1E7yTHnMrzRYId24btlf5Q2veyJXjgZZHBMIUTlUuK31rx58xg6dChPPfUUI0eO\npH79+rzzzjslbrhXr1589913ABw/fpzg4GC75vtHHnmE1NRUjEYjO3fupEePHiWuI6o3c0oyZx57\nRJuv0ffy6HgpeansjN2Ln2sN+td13HC5Wbkmvtx9HoDwznWo4eVawhpCCFHxlHjl//LLL/PCC4Vj\noA8fPvy6N9yxY0fCwsKIiIhAp9Px0ksvsXHjRry9vRk4cCD33nsv48ePR6fTMWHCBPz9/fH39y+y\njhAAlox0Imc+o83XnjwFndPl03fT2f9hUVbubDIEF4OzQ2LIzDXZDc17V59GDtmPEEI4WonJ32Aw\nsH//fjp27Iiz8+UvVb2+5KbOGTNm2M23aHH5AaxBgwYxaNCgEtcRAuD8jOnadINFS3AJCdHmz6Sf\n49fkP2joU59Owe0csn+rzWaX+J+6tx3uro7tP0AIIRylxG+vDRs2sGbNGm1eKYVOp+PkyZMODUyI\nS9J/uPx2SeM3V2HwuDwsrk3Z+PLsZgDuaea4V/ve//aENr1iam98PFwcsh8hhCgLJSb/I0eOlEUc\nQhQr+fPPAPDt288u8QMcTPiF2OwLdAnpSAOfeg7Zv1KKQycLB7MaHd5UEr8QotKTdktRoZkSErTp\n4AfG2tXlWwr45txWnPXOjGg82CH7zzKamPbm5eb+gZ3rOmQ/QghRliT5iwrLZjYR9eJMAAy+NbSO\nfC7ZHrOLLFM2tzcIx8+tRqnuOzPXxGv/PUp8yuX39x8a4rhOg4QQoixJ8hcV1tlJE7Tpei/av/mR\nlp/OjzG78XXxYWD9fqW6X5PZavdwH8DSx3vi7+NWqvsRQojyUmLyz8zM5J133iE5OZmlS5eyY8cO\n2rdvr/XLL4QjGE/9qU3XefZ5nP387Oq/PrcVs83CiMa342oo3Xvwb391udveGRHtadVAznUhRNVS\n4vt6L774IrVq1SIuLg4oHNXvueeec3hgonq7NGiPk58/Hs2a29VFZkZzOPFX6nnXoUvNDqW73+Qc\nfjuXCsBT97WTxC+EqJJKTP5paWmMHTtWe8d/8ODB5OfnOzwwUX0l/udjbbrh4lft6pRSfHGmsHvf\nkU2Ho9eVXte6FquNOR8e0uZbNwwotW0LIURFcl33/M1ms/b+dEpKCkaj0aFBierLnJ5O5p5dANQI\nH2jXix/A4cRficqKoUNwW5rUaFhq+z0Vk86StUe1+eVP9Cq1bQshREVTYvJ/4IEHuOeee0hOTuax\nxx7jjz/+0Lr7FaI02UwmIp+53JNf0L2j7epNVhNfn9uKk96JOxsPKdV9v3dFJz6PDGuJr/TZL4So\nwkpM/rfffjsdO3bk6NGjuLi4MH/+fIKDg8siNlHNZO7aoU03Wrq8yKt9P8bsJb0gg0H1byXQvfTu\nxf96NoX07AIA3p3RD2cnGaVPCFG1lZj8+/bty7Bhw7jjjjvs+uYXorRl/rQHKOzMx6mG/dP9GQWZ\nfB+9A28XLwbVv/Vqq98Um1K8+cXvAPh6ukjiF0JUCyV+061fv56goCBmz57NiBEj+PDDD0lMTCyL\n2EQ1kvnTXkzx8QB4d+tepP7bc99hspkZ3ug23J1K7337t778Q5teOrlnqW1XCCEqshKTf82aNXno\noYfYsGEDq1atIi4ujvDw8LKITVQjaZu/0aYNHh52dTFZcRxMOEKoVy161OpSavs05pv59WwKAE/c\n3QbDdYxUKYQQVcF1Pe1/+vRpvvvuO77//ntq1KjBnDlzHB2XqEbyY6IxpyQD0OStd+zrLAUsOfwm\nAHc0Glyqr/bN/ej/tMH5tdwAACAASURBVOmOzYJKbbtCCFHRlZj8Bw8ejLu7O8OGDeODDz4g5Ipx\n1IUoDcnrP9em9W72Tfpbo37Qppv7NSm1fWblmkjJLOyvYkZE+1LbrhBCVAYlJv+33nqLJk1K70tX\niL/L+/MkAA0WvmJX/nvycX6I2Q3A/c1H4mxwLrV9/ue7U9q09OInhKhuik3+06ZNY8WKFTz88MNa\nBz9Q2MOaTqdj165dZRGfqOIy9+7Wpl1q1rSre/ePNdp015odS22fZouNX04X3maYcnebUtuuEEJU\nFsUm/xdffBGAtWvXFqnLy8tzXESi2rBkZJC45iMAfHr3savLNV/uRXJFv5dx1pfeAJT7jycAoAM6\nyL1+IUQ1VOzTU4GBgQDMmTOH0NBQu38ysI8oDVn792nTwREP2NU9u3cuAEHuAaWa+G1K8fHWwhED\nR90qt7OEENVTsd+q33zzDatWrSI+Pp5+/fpp5WazWfthIMTNsublkfLlegAC777H7kG/zee/16Yf\nb/dwqe73wF9X/QADOtUp1W0LIURlUWzyv+OOOxg6dCgvvPACU6ZM0cr1er107yv+sXNTJmnTvn0v\n99hnsVm0J/xvbxBOsEfp/dA8eyGTDzYXPlzYp20t6c1PCFFtFZv8T5w4QatWrRgxYgQxMTF2dVFR\nUfTo0cPhwYmqyZRwUZuuO/MFDJ6e2vziQyu06SENS6czKZtNcSIqjWXrf9PKxt0uXVULIaqvYpP/\nV199RatWrVi9enWROp1OJ8lf3LSYlxcC4Fq3Lu5NmtrVJRiTAHih61Ol1qHP7A8PcjH18gOEK6f1\nQX/FGyxCCPH/7d13XFXlH8Dxz73AZQgOBBwMJw5AcFtqbi1nmSO1TIufK3GlJqFJ7jTLzLTUzJbm\nKCs1t6U5cCOCSioqKooCTkDG5d7fHzcPkgMu3IuM7/v16tVzx/Oc5zwc/HLOec73KW6eGPyDgoIA\n+OGHH7K8r9PpUEsaVJFL2rt30SUnAVDhnRFZPvvqxHKlXNE+62N/ubV802kl8Deo6czbnWpja226\nCYRCCFEYZRvF161bx4oVK8jIyKBv3760bdv2sY//CZETDy/bq3HOnDtyX5tCeLzhfryvk7dJtqXX\n69lzwnCLoU7VsgzvXkcCvxBCkIPgv3r1anr16sX27dvx9PRk586dbN68OT/6JooYvVZLwvrfgEfP\n+hccX6qUh/gOMMn2xn+Z+SjhmN5+JmlTCCGKgmyDv7W1NRqNht27d9OxY0e55C9yLfFE5oQ7+7r1\nlHK6Tkv03csABDYaZZJt/bbnPDfvpgIwUCb3CSFEFjmK5FOmTOHYsWM0btyY0NBQ0tLSzN0vUQTd\n/8eQXMex68uoHvojcvSuIKXs7uCa5+3o9XrW77sIGB7pa+FXMc9tCiFEUZJt8J87dy6VKlXiq6++\nwsLCgpiYGKZMmZIffRNFTFLYcQBsKlVW3jtzK0opT2z8rkm2c+9+ulJ+q1Ntk7QphBBFSbazn1xc\nXPDx8WHXrl3s3r0bPz8/atWSy6jCOGk3bpAeb1hMx66WISCnZaQxP3QxAOXsnE02w//LXyMMbTra\nmaQ9IYQoarI9858/fz5z5szhxo0bXL9+nenTp7N48eL86JsoInQp97kY9J7yWm1jg16vJ2jfDOW9\n9xuPMcm2Eu6k8M/l2wD0bFnNJG0KIURRk+2Z/8GDB1m1apUy0U+r1fLGG28wZMgQs3dOFA3nRg5X\nytUWfEmKNpWxf3+gvDe2wXCTLN6j0+uzzPCvX0PWoBBCiMfJ9sz/v0l9LC0tUUl2NJFD96POgU4H\ngMfkKVjY2rLmzG/K512rvkTVUpVMsq2vfj+plL8Y3UKOUyGEeIJsT7d8fHwYOnQoTZs2BWD//v3U\nqVPH7B0TRUP8r78AoLYrgY2HIcgfjD0KwDDft/BxMt2EvCORhtTA/+tSGzsbSeYjhBBPku2/kEFB\nQWzevJmwsDBUKhXdunWjY8eO+dE3UcjpdTruRxqy9rmPDwTgRnK88rlX2Zom29bDS/U29algsnaF\nEKIoyjb4q9VqPD09UalUqFQqatasKZdTRY6cH5/56J61uzsAUw7MUd4z1cI9J6LiWbLhlEnaEkKI\n4iDb4D979mx27txJnTp10Ol0fPLJJ3Tp0oXRo0fnR/9EIZUaE0PGHcOs+4rDRwKG/P0PzG4ebJLt\npGsz+GztCeX11++1Nkm7QghRlOVotv8ff/yBlZUVAGlpafTp00eCv3iquFUrlLJ9vfoAnL55BoCS\nGgfsNSVMsp0hc3cr5SXjW6FWy1UpIYTITrbXXZ2cnLC0zPwbwcrKClfXvKdgFUVb8mnDZfgqcz4B\nDCl3d13eB0CP6l1Mso2oq3eU8vi+9bC0kHUnhBAiJ7I98y9Tpgw9evTgueeeQ6/Xc/jwYdzd3Zk/\nfz4Ao0aZZiEWUXSkXbuqlC3LOAJw7EYYUXcu4OvkTcPy9Z5U1Sgzvjc8NVDKXkPtSmVM0qYQQhQH\n2QZ/d3d33P+drAXQqlUrc/ZHFAFxP68BQPVvTojUjDTWnfsDS5UFr5rorP9i7F2lHDywkUnaFEKI\n4iLb4B8QEJAf/RBFhF6nUxbwcRtrSOm7PfovbqfeoUOl1jjblTXJdg6eug6Ad+UylLa3NkmbQghR\nXMhNUmEyer2es4PfVl7betYg4f5NdlzaTSlNSV6s1MYk27l5N4Wthy4D0KqezD8RQghjSfAXJnPv\n0EGl7B5kyN2/7twfpOu0vFK9EzaWpjlDH7coM39/vRrOJmlTCCGKkxwF/1u3bhEeHg4Ycv0L8V96\nnY7YpV8BULr9i9hWrcY/N89xPC6cqqUq0aicaSb5XbiWea//04BmqCXhlBBCGC3b4L9x40Zee+01\n3n//fQCmTZvG2rVrzd4xUbhc+SQzc59T9x5k6DL4+ex6VKjo5fmyybJCTvvuCACOJa3lXr8QQuRS\ntsF/+fLl/P7775QpY3iUasKECaxZs8bsHROFx/UV33P/n0gAyg30R63RsOfqAa4mxfJ8hUZ4lHQz\nyXau3EhUyqN6+pmkTSGEKI6yDf4ODg7Y2toqr21sbJRsf0LEr/uZO3/9CYCmQkVKNX+BQ7HHWHvm\nd2wtbehW7SWTbEen0zP5m0PKa3cXe5O0K4QQxVGOkvz8+uuvpKamcvLkSTZt2oSjo2N+9E0UcCkX\nznNz00YASjZ7gfJv+QPw3alVAHTwaI2DxjRBeuuhS0r581EvmKRNIYQorrI9858yZQrh4eEkJSUx\nadIkUlNTmT59en70TRRwl2ZMVcrlBhoe8TudcEZ5r0Nl0y2ys37/RQD6d6iBva1ceRJCiLzI9sy/\nZMmSTJ48OVeNz5w5k7CwMFQqFUFBQfj6+iqfrVixgvXr16NWq/Hx8WHixImsW7eO+fPn4+HhAUDT\npk0ZNmxYrrYtzEuv1ytlz6++Vib07bz8NwDl7VxMtq0N+y+SmpYBQLM6FUzWrhBCFFfZBv+WLVs+\ndqb2rl27nlrv0KFDREdHs3r1aqKioggKCmL16tUAJCYmsmzZMrZt24alpSVvv/02x48bssJ16tSJ\nCRMm5GJXRH7Sp6YqZdW/Cz8lpiUpK/f5+7xhku0cjrzBr3+fB6C0vQaNlYVJ2hVCiOIs2+C/cuVK\npZyenk5ISAipD/3D/yQhISG0a9cOgGrVqnHnzh0SExOxt7fHysoKKysrkpOTsbOz4/79+5QqVSoP\nuyHyW0r0RQDsfDKv5uy4lLm8bkX78ibZzvdbIpXypwHNTdKmEEIUd9kG//8u31u5cmX8/f0ZOHDg\nU+vFx8fj7e2tvHZ0dCQuLg57e3usra0ZPnw47dq1w9rams6dO1OlShVCQ0M5dOgQ/v7+aLVaJkyY\ngJeXV7Y74ezskO13RN78d4yPzzLkeihVyU35bPufuwCY3Gq0SX4ml6/fIylFC8CGT17Oc3sFnRzH\n+UPG2fxkjAu+bIN/SEhIltexsbFcunTpCd9+sofvEScmJrJ48WK2bNmCvb09AwYMIDIyEj8/Pxwd\nHWnVqhWhoaFMmDCBDRs2ZNt2XNw9o/sjcs7Z2eGRMU6+cgUATeOmxMXdI0OXoXzmoqpgkp/Jewv2\nKOWi/jN+3BgL05NxNj8ZY/MzxR9X2Qb/RYsWKWWVSoW9vT1TpkzJtmEXFxfi4+OV1zdu3MDZ2ZCH\nPSoqCnd3d+WRwYYNGxIREUHPnj2pVq0aAPXq1ePmzZtkZGRgYSH3eQuSlEvRyj1/TYWKAGy+uBMA\nFSqTZPNLSdOSeD8dgI+GPp/n9oQQQmTKNvgHBgZmuXyfU82aNWPBggX06dOHkydP4uLigr294Zlv\nV1dXoqKiSElJwcbGhoiICFq2bMnSpUupUKECXbp04cyZMzg6OkrgL4ASjxlS7Fo6OaFSq0lMT2Lz\nxR0A1HfxfVrVHPtk1XGl7FLa9infFEIIYaxsg//s2bP5/vvvjW64fv36eHt706dPH1QqFcHBwaxb\ntw4HBwfat2+Pv78/b775JhYWFtSrV4+GDRvi5ubG+PHjWbVqFVqtlhkzZuRqp4T56FJSuLnRcCvG\n6eXuAEzYk3klqGeNbnnextX4JKKuGhbwCXy9fp7bE0IIkZVK//DN+McIDAwkJiYGPz+/LGl9R40a\nZfbO5ZTcXzKvh+/hXf3yCxKPGs78q3/xFZ+dXE7UnQsABD83Hhe7vC+x+/ZHfyrlbwLb5Lm9wkDu\nk+YPGWfzkzE2v3y55+/m5oabm2kWZhGFW2rMFSXwVxg8DK2VWgn8vWq8bJLAv+PIZaW8cEyLPLcn\nhBDiUU8M/uvXr6dbt24EBATkZ39EAZWRnER08CTDCwsLHBo3YdrBT5TPW7k1M8l2Vu44a2ivbkVs\nrbP921QIIUQuPDG3/88//5yf/RAFXMy8zEBfff5C7qbdIzbpOgD/8+lvkm2cjr6llF/vUMMkbQoh\nhHhUtgv7CAGGFfwAKn04DbWNDe/vnaZ8Vs+ljkm2sX6v4RaCm3MJLNRyaAohhLk88bpqaGgorVq1\neuR9vV6PSqXKNre/KDqSLlxUytZu7iw/mZnyObDRaJNsQ6fX88/l2wCM7uVnkjaFEEI83hODv5eX\nF59++ml+9kUUUMdHjwVA4+pGekY6R64bnsFv6dYUd4eKJtnG5z+fUMqOJW1M0qYQQojHe2Lw12g0\nj+T1F8XPrR3blLL7+EBG7p6ovO5d4xWTbOPhR/tea1PdJG0KIYR4sifeWPX1NU2mNlG4xa0yXOIv\n4VeXE8kXlPc/fM40yy7/uO0fpVypvAMvNvYwSbtCCCGe7InBf/z48fnZD1EA3fpzh1J2HTGafVcP\nAlClZCWc7crmuf0bt+/z57EYABrUcCZ4YKM8tymEECJ7MqVaPNHt7YZL/mWbNQXg9M0zALzp1dsk\n7Qd+lbli5PBXTfPEgBBCiOxJ8BePpc/IID3uBgDVA4Zx4U608pkpMvn9HXZVKQe90SDP7QkhhMg5\nCf7ise7u26uULe3s2HLRMCmvrI2jSdo/dNqQIMinqiPV3UqZpE0hhBA5I8FfPNa9f5ftdeppuMQf\nkXAagLe8++W57QydjlMXDdn8/DvVznN7QgghjCPBXzxWckQ4ACWbNWf7uT3K+x4OeX/8c9GvEUq5\nlL11ntsTQghhHAn+4hH6jAylbOlQkqVHDY/7udpXwEJtkbe29XpCz8YD0LetZ57aEkIIkTsS/MUj\nzg7xV8on4k4q5fdNkMp374lrSrl9I/c8tyeEEMJ4EvxFFnqdTim7vz+JxeHfAVDHyQuVSpXn9pdv\njgTgpSaSzEcIIZ4VCf4ii9ToiwBYOjlhXbWq8v6QOgPy3PaVG4lKuWeranluTwghRO5I8BdZ3Nlr\nmNxX8rmm7Lt6CABrS2uTnPVP/sbQXnXXUqhN0J4QQojckeAvFOk3E7iz+y8ANOXKs/H8VgA612hj\n0u34d5HH+4QQ4lmS4C8UN1b+qJRLPt+UxPQkAHp6dcpz23eT0wCw0VhQroxdntsTQgiRexL8hSL5\npOH5e/cJQRy5fhwAGwsbLC2euPJzjv317wI+TqVs8tyWEEKIvJHgLwDQa7Xo09MBsKpaleUnDc/2\ne5etaZL2f99rWA64qU8Fk7QnhBAi9yT4CwDuHshcYW9XzD6lPNC7b57b/mDZQaXcobE82y+EEM+a\nBH+BXqfj+rfLACj7cnd+PfcHAK9W74JalbdDZOuhS8TEGeYO9GhZVWb5CyFEASDBXxC7bIlS3uiR\n+Sx+S7emeWo34kICq/88B4BLaVs6P185T+0JIYQwDQn+gnsHDwDg9GpPDiWEAdDTsxuW6txP9EtK\nSefT1WHK64+GPp+3TgohhDAZCf7FnF6vV8qhnpkz8Vu7N89TuyM+y1wJ8Mt3W+apLSGEEKYlwb+Y\nS7t6VSnHam8D0NajRZ7a/OfSLaU89e3GWGvythKgEEII05LgX8zFrTI80mdXvwGhceGU0pTklWq5\nT+qTodMxe2UoALU8SuPmYm+SfgohhDAdCf7FXPJpw5K9N5v7cF97n4bl6uZ6hr9Or2fQnF3K6+4t\nqj75y0IIIZ4ZCf7FmF6rVcqH1IYMfA3L1811e3vCMm8hDH3ZG0+30rnvnBBCCLOR4F+MXZ4zCwB1\niRJExJ+inJ0z7vauuW7vuy3/ANC2gRuNa5czSR+FEEKYngT/Ykqv15NyPgqAu52ak67T0rBc3Vwv\n3fv3Q2f93V+oYpI+CiGEMA8J/sVUYugxpby/nCEDX8Ny9XLVVlp6Bt9ujgTAu3IZ7Gys8t5BIYQQ\nZiPBv5iKXfoVADaNG/HPrXNUKumOi51Trtr6KzRGKY/u7WeS/gkhhDAfCf7F1IMV/C608UKn19Eo\nl2f9gJLCt0vTylio5ZASQoiCTv6lLoYykgyX+VGpOHz7JCpU1HfxzVVbV+Iy1wJo18DNFN0TQghh\nZhL8i6G7Bw3L91pW9uDC3UvULFOdUtYljW5Hr9czedkhADSWakqW0Ji0n0IIIcxDgn8xlHLOcJk+\n1tMZgIblc3fJ/8zl20p5/sgX8t4xIYQQ+UKCfzF075BhFb+9TolYqi2p6+ydq3Z+3H4GgNqVykj+\nfiGEKEQk+BczutRUpRzNTXzK1sbW0tbodm7cSiYmzjB34I0ONUzWPyGEEOYnwb+YiVuzKsvrRuWM\nT+er0+sJXHxAeV2hbIk890sIIUT+keBfzNzZ/RcAfzU0rLbnXbaW0W18+M1hpfzFaLnXL4QQhY0E\n/2JEr9cr5Yjqhkv9VhbGZePT6/XK43192npKNj8hhCiEJPgXI+k3ritlnVqFl2NNo9tYv++iUu7Q\nyN0U3RJCCJHPJPgXI0kR4QD8U8kagBZuzxtVX5uh4/e9FwDDyn1CCCEKJwn+xUjSiTAAUjRq6rv4\nUsfJy6j6mw9EK+W+bT1N2jchhBD5R4J/MaFLSSH5ZAQAR73sGOjV1+g2th66DMCQbt6o1blb+lcI\nIcSzJ8G/mEgKP6GU75WwwEJtXFKem3dTSE7VAlCnalmT9k0IIUT+sjRn4zNnziQsLAyVSkVQUBC+\nvpmLx6xYsYL169ejVqvx8fFh4sSJpKenExgYyNWrV7GwsGDWrFm4u8ukMlOI/WYpAMdq2WJtYXwO\n/r0nrillOxuzHjZCCCHMzGxn/ocOHSI6OprVq1czY8YMZsyYoXyWmJjIsmXLWLFiBT/99BNRUVEc\nP36cjRs3UrJkSX766SeGDh3KJ598Yq7uFSvXf/xeWcL3sFcJOlRqY3Qbfx67AsA7r/iYtG9CCCHy\nn9mCf0hICO3atQOgWrVq3Llzh8REw/PhVlZWWFlZkZycjFar5f79+5QqVYqQkBDat28PQNOmTTl2\n7Ji5ulds6PV67uz6E4AYZytSbNS0dTcuMc+Fq3e4m2z448GvulzyF0KIws5swT8+Pp4yZcoorx0d\nHYmLiwPA2tqa4cOH065dO1q3bo2fnx9VqlQhPj4eR0dHQ8fUalQqFWlpaebqYrGQGp05Q//n9oaf\nh7GJfX7bHaWUrSxlAR8hhCjs8u3m7cPZ5RITE1m8eDFbtmzB3t6eAQMGEBkZ+dQ6T+Ps7GCyfhY1\nMfsMz+VfcTEE/C412ho9Xn8eMczyX/J+O5ydJI+/uchxnD9knM1PxrjgM1vwd3FxIT4+Xnl948YN\nnJ0N68dHRUXh7u6unOU3bNiQiIgIXFxciIuLo1atWqSnp6PX69Fosp+cFhd3zzw7UQQkRJ4FIKKa\nIZ3v887P5Xq8LHQZMtZm4uzsIGObD2SczU/G2PxM8ceV2S77N2vWjK1btwJw8uRJXFxcsLc3LCbj\n6upKVFQUKSkpAERERFC5cmWaNWvGli1bAPjrr79o0qSJubpXbNwL2Q/ADUdLmlZoTGnrUkbV3/Dv\nlYOSJTSoVPJsvxBCFAVmO/OvX78+3t7e9OnTB5VKRXBwMOvWrcPBwYH27dvj7+/Pm2++iYWFBfXq\n1aNhw4ZkZGSwf/9++vbti0aj4aOPPjJX94qFtOuxSvlWSQtauDU1uo2NIYY5A5LOVwghig6z3vMf\nN25clte1amUuH9unTx/69OmT5fMHz/YL07i9czsAsWUtsVRb4u5Q0aj691O1pGt1AHRtWtnU3RNC\nCPGMSIa/Iiw58jQA+/3saVrR+FsokdG3ANBYyQx/IYQoSiT4F2FpV68CcNXJCu+yxi/fe+qiIfh3\nblbFpP0SQgjxbEnwL6Iefkwyw1KFj1Nto9vY+W9Wvybe5U3WLyGEEM+eBP8i6u7+fQBcrKChc5X2\neWrLq4qjKbokhBCigJDgX0RdX/41AKkaFR0rtzO6/tINpwCw0VjII35CCFHESPAvghKPhyrl0y95\n5yp4h5w0PCbYqq6ryfolhBCiYJC1WYugmzGGxDxhnra8Ubu30fWTU9KVcu821U3WLyGE6S1YMI9/\n/jnNzZsJpKSkULGiKyVLlmLmzI/Nvu309HSGDfOnatVqODk54+LiwqBBb+W53aNHD7N8+VIuXYpm\n/fqt3Lt3j48+msrt27fJyMjA0bEsEycGU6KEvQn2ItPff+9i//49BAZ+8NjPlyxZhIuLC6+80tOk\n230WJPgXQSm/rgfgnntZKtobP1kv4LM9AJQqkX1qZSHEszVixBgANm3awPnzUQQEjM63bd+4cR29\nXk9QUDBLliwyWbsNGjTCz68er77aGYBVq37E17cur732OgDffLOE7du38sorPUy2zeJGgn8RVqfF\ny0bX2XLwklIe0s3blN0RQuSzRYs+5+TJcHS6DHr27Evbtu2ZOvUDypUrzz//nObGjesEB0+nSpVq\nfPjhRG7duklaWhqDBg2jcePn+OmnH9m1aycALVu2oV+//kyd+gE2Njbcu3eP9PQ0Ll++xEcfTcPR\nMXO57wUL5nHqVARarZZevfpSsmRJQkL2MmbMe2zevJE1a1ayfPlKrl+PZdasqXz22dP/cEhMvIda\nnXmX+u23Bz/ynQ0bfiMi4gS3bt3kwoULDBnyDtu2beHSpYt8+OEMatXyeuz+nD37DzNmTKFUqVJU\nrJh5m3Pt2lX8+ec2VCo1rVq1oXfvfnn6WRQ0EvyLmIgdP/PgfL2RWyOj66/96xwAVSo4UKtSmWy+\nLYR42Jo/z3E48oZJ22xUyyVXt9+OHTvCrVs3WbhwKampKfj7v8kLL7QEICMjg08//YJfflnN1q2b\nadOmHcnJSSxcuJS7d+9w+PBBrly5zPbtW1iy5FsA/P3707p1WwBKly7De+9N5MqVy0yd+gGBgR8o\nZ/4hISFcuXKJL79cRnJyMgMG9GX58hUsX74UgPDwMEqWLE1ycjLh4WHUr98w233p0eM13n03gH37\n9tCkyfO0bduB6tU9H/leTMwVFixYzG+//cKKFd+xbNmPrF//Kzt2bMPe3uGx+7N8+dcMHvwOTZs2\nZ/bs6QBcuXKZvXv/5ssvv0Gn0zFkyFu0bm38xOmCTCb8FTGxh/cCkOBm3AI+AIdOX+dBdoDA1+ub\nsFdCiPwWHh5GeHgYAQGDGTt2JDpdBjdvJgDg51cPAGfnciQlJVKlSjXu3LnDtGmTOX48lNat2/HP\nP5HUqeOLpaUllpaW1KnjS1SUYZVQL68nXxWMiIigbt0GANjZ2eHhUYnY2Guo1WpSU1OJj4+jWbPm\nnD59kvDwMOrVa5Dtvnh4VGLVql8ZMmQ4aWmpjBw5lM2bNz7yvVq1vFCpVJQt60T16jVQq9U4OpYl\nKSnxiftz8eJ5fHx8AZS+nDoVweXL0QQEDGbkyKGkpNzn2rWrRox+wSdn/kWMR9RtAGr0Nn7SzU87\nDb/YZUtaY2UpKX2FMFbvNtULzCRZKysrunXrTr9+bz7ymYVF5u+3Xq/H1taWpUu/Izw8jD/+WE9I\nyD4aNWqSJVlYeno6KpXhfNHS0uqJ21WpVFnqabXpqNUq6tTx5ciRQ9jbO+DlVYejRw9x7txZRox4\nN0v9W7duUaZMGfR6vdLP1NQUrK1taNLkeZo0eZ7nn2/Ojz9+S8eOXZ64X//dx//268H+6PV65ZaC\nTqdT9q9ZsxaMHTshS/sHDux/4n4XNnLmX4Tcv3RRKZf1qmtU3bT0DO4kpgEw7X+ylLIQhZ2Xlw/7\n9u1Bp9ORkpLCZ5/NfeJ3IyNPsXPnNvz86jF+fBDnz5+jZs1aREScQKvVotVqiYw8hadnjWy3W6dO\nHUJDjwCQlJTItWtXcXV1o27dBqxZ8xNeXt7UqFGT8PAwSpQogaVl5jloYmIi/v5vkJKSwsWL5/Hw\nqAzAiBFDOXbsiPK9uLgbWe7P58ST9sfDoxKRkYa8Jg+2UatWbY4ePURqago6nY7PPvuYtLQ0o7ZX\n0MmZfxFybdPvAPxT2Ybsf0Wz+n3vBaVso5HDQojCrm7d+vj4+DJkyFuAnh49XnvidytWdGXx4oX8\n9tsvqFQqXn99AK6ubnTs2JURI4ag1+t55ZWeuLiUy3a7TZo0YcuWHQwfPgitVsvw4aOxtrbB17cu\n778/lmHDAtBor6B0ygAAHQlJREFUNCQmJtKsWYssde3t7XnjjYG8844/VlYaxowZD8CkSR/y6adz\nWLZsMWq1mlKlSjF2bKBR4/Gk/Rk48H989NF0XFxcKF++IunpaVSs6Er37r0YPnwwKpWKVq3aoNEU\nraefVPqHr4MUUnFx9551FwqEM/8bCMD+HnUY2HGsUXWnfXeYC9fuMbx7HRrUdM7ymbOzg4yxmckY\n5w8ZZ/OTMTY/Z2eHPLchl/2LiOsrflDKTRp1eco3H5WSpuXCNcMvq09VyeMvhBBFnQT/IuLOX4Zn\nV09VtaFWWeMu+n/1+0mlbG0lE/2EEKKok+BfBOjSMyeiHGnhblQuf22GjhNRhsd/BnasZfK+CSGE\nKHgk+BcBNzcY0vkm2qppUiH7Z2Yf9vBEvxd8K5i0X0IIIQomCf5FwK1dfwKGS/51XeoYVff6rfsA\nvNWxlizdK4QQxYQE/yJAdz8ZgEPeJXB3MO7Z19MXbwJQU1L5CiFEsSEPdBdy+owMVP8+rNmhmnG5\np/V6PUkpWgCcStqYumtCiHxw7dpV3nyzDzVrGubspKWl8frrA2jZsvUz61NsbCw3b8bj5eXD/Pmf\n0KtXH6OT8jwsLu4GPXp0Yfr0ObRo0QowJOSZPDmQypWrApCRoWXo0JH4+dVl2bLFlC5d+qm5DYo7\nCf6F3PUfvwPgnq2ahuWNy+q3/fBlpaxWyyV/IQorD49KfPHFEgDu3r3DW2+9znPPPY+19bP5o/7Y\nscPcv5+Ml5cPo0YZl3PkcXbs2Iabmzs7d25Vgj8YEhlNnz4HMCzqM378KFau/CXP2ysOJPgXYnq9\nnrt7/gbggG8JhpfIPvvWAzqdnlV/Glbwa+ZT3iz9E0Lkv5IlS1G2rBMJCQloNBpmzZr2b359NRMm\nfED58uXp06c7NWrUonHjJlhYWLJu3RosLa2oXr0GY8dOICBgMLVrexMZeYrU1FSmTp1F+fIVWLx4\nISdOHEeny+DVV3vTvv1LxMZeY/r0YHQ6HeXLVyA4eBLffLMES0tLypUrz6pVKxg1ahyBge+ycuUv\nWFtbExp6lLVrVzFp0ofMnDmFe/fukZGRwejR4x+7Wt/27VsYM+Y9PvwwiPv372Nra/vId1xd3UhK\nSiIjIyM/hrnQk+BfiGXcvaOUE+pUMqru6n8DP0D/F2uarE9CFGfrzm0k9Ea4Sdus51KHV6vnPHHX\ntWtXuXv3Di4u5fj445n06fM6jRo1ISRkL9999zUTJkzi6tUYZs6cS9Wq1RgwoA9z5nxGuXLl+eOP\n9aSmpgCGPyIWLFjMzz+vYs2albRs2Ybr12NZuHApaWlpvP32G7Ro0YolSxbRp8/rNG/ekkWL5hMT\nE0PHjl0oXbo0zZu3ZNWqFVhYqGnYsDFHjx6madPm7N27m1at2rJmzU80adKUrl1f4cKF88yfP5fP\nPluUZX8uXbpIUlIijRo1oV69Buzdu5v27V96ZL9PnYrAxaVclgV9xJNJ8C/E7h05DMD5ihq6VX3R\nqLqnog0T/Xq3ro5GEvsIUahdumRYfhZAo9EwadIULC0tiYg4waVL0Xz33TJ0Oh2lSxsm9trY2FK1\najUA2rV7kaCg8bz4YkfatXtRuVXQqFFjAHx8fDlwYD/h4WGcPBmubEev1xEfH8+ZM5HKpf133hmF\ns7MDmzdvf6SPLVu2Yd++v2natDkHDx7A338IH3zwPrdv32Lr1k0Ayh8eD9u+fStt23YAoH37l9i0\naYMS/I8fP6b0p0SJEkyaNMUEo1k8SPAvxOJ+WgFATDkNHZyfvL7248TEJQHQoZG7yfslRHH1avUu\nRp2lm8rD9/wfZmlpxbRps3FycsryvpVV5j/9/fu/Rfv2Hdm1awcjRw5j4UJDOw+Wt32wHK6VlRVd\nurxM//5ZlwtXq9XodNkvEdOwYWMWLZpPVNQ5XF1dsbMrgZWVJWPGjMfHx/eJ9bZv34parWL//r3o\ndBlcvRrDvXuGdOQP3/MXxpFH/Qop/b+/mADHatmiVuX8R3k/VauUZaKfEEWXl5cPe/bsAuDo0cNs\n27Yly+c6nY7Fixfi5OREnz5v4ONTh9jYWADCwo4DEBERTuXKVbMsEZyamsq8eYagW6uWF8eOGa5C\nfv31V+zfvx+1Wv3IvXeNRkO1ap6sXPk9rVq1Vfr399+G/l24cJ5Vq37MUuf06ZPY2dmxcuUvfPvt\nSr7/fjVt2rRn9+6dphukYkqCfyGlS00FIMlGzbiGI4yq+92WSADKOFibvF9CiILD338we/bsYvjw\nQSxfvhQfn6xJwNRqNXZ2JRgy5C1GjRqGSqXC09OwNsj167G8++4Itm/fQu/efalTx4969RowZMhb\nBAQMombN2v9uYwjr1/9GQMBgrl2LoUmTJvj41GHFiu/Ztm1zlu21bNmGXbt20rx5SwB69nyNmJjL\nvPPO/5g9ezp169bP8v3t27fQuXPXLO917tyNHTu2mXSciiNZ0reQunDoT9KXfM/FCho6THv0ct/T\nvP2RISPgiFfrUK+GczbfliU684OMcf6Qcc6ZgIDBvPvue1StWt3oujLG5idL+hZjYdvXAKCxszeq\n3pnLt5Wyb/WyJu2TEEKIwkEm/BVC2vQ0vC4YZsXWav2yUXU/WxsGQHPfClio5W8/IcSjHjd5UBQt\n8q9/IXR1V+aknXKNX8hxPb1eT0qaYRJO37aPJtIQQghRPEjwL4TubjNMdrnauDoqI87eV24/C0DV\niiWxtZaLPkIIUVxJ8C+ENLcSAUht2cioehdi7wLQqJaLyfskhBCi8JDgX8joUu4b/q+Ceu4NjKp7\n/qoh+Lfwq2jyfgkhhCg8JPgXMon/Jt5I1aiwt7LLcb3klHQALC1UcslfiCJkwYJ5BAQMpl+/Hrz6\namcCAgYTFDT+WXcLgHnz5ihJg3JjyZJF/Pbbz4BhztL774/jt99Ms2rfw21np3v3TqT+m1vFXPJj\nGw+TKFDI3N23F4BTVW3xsch5kp7NBy8BUNO9tFn6JYR4NkaMGAPApk0bOH8+ioCA0c+4R5nGjHnP\nZG0tWbKIihUr8sorPUzWZnEmwb+QSb5+FYCT1WyxUOdsQZ6klHT+CIkGoFHtnC/7K4Qo3BYsmMep\nUxFotVp69epLhw4vMWyYP3Xq+HLq1EnS09OZOnUW5cplLusdGXmKefM+xsrKCmtrG6ZOnUlGRgZT\np35AcnIy9vYOTJkyk/T0dGbNylyO9913J1ClSlVeeuklKlWqStOmzdmw4TcCAz9g69ZNpKamEB19\nkZiYK4wZ8x6NGz/H999/w59/7sDV1ZW0tDTeeOMt/PzqPrIf27dv4fz5c8ya9Yny3ooV37Fnz24y\nMrQ0b96SAQP8iY2NZfLkQDQaDb6+dTl5MoL58xc9djsPW7Toc06eDEeny6Bnz760bdv+seMZG3uN\niRPf4+OPPyM6+iJLl36JpaUl5ctX4L33JhIaepSff15FUlISAQGjmTr1A557rikREeGULl2a2bPn\nkZycxMyZU0hMTPx33HKXTCmvJPgXNgm3AHCt7JXjKgvXZS4x2ty3gsm7JIQwiFu7Sllt01QcGjbC\nuVcfo+sdPXqYK1cu8eWXy0hOTmbAgL688IIhrW7p0o588cUSVq9ewc8/r2b48FFKvY0b19Oz52u0\nb/8Shw8fJCEhgT/+WE/Tpi/w6qu9WLnyB44ePUxk5Cl8fevRt+8bRESE88UX8/jkkwVER0czY8bH\neHhUZsOG35R24+LimDv3c/bt28P69evw9KzJ77+vY+XKX7h37y59+776SFAGiIw8zZ9/7mDlyl9Q\nP/R0k1ptwaJFXwPQq1c3evfux6pVP9Khw0v07NmHBQs+BeDWrVtP3c6xY0e4desmCxcuJTU1BX//\nN3nhhZZoNJos/UhNTWXatMm8//5kypRx5N13R7BgwWIcHBxYsOBTdu/+k5IlS3H+/HlWrvwZlUrF\nlSuX6dLlFUaOHIu/f38uXIjir7920qxZCzp16sq5c2dZuPBzPvnkc6N/vnklwb8Q0d42ZOfTWoCv\ns0+O6pyLuUPkJUO9if0boFbJQj5CFAeRkaeoW9cwKdjOzg4Pj0rExFwBMpfr9fb25YcfvslSr0WL\nVnz66Ryioy/Spk17PDwqceZMJO3aGZYN79evPwC//fYz//vfMAB8fOpw6ZLh6qKDgwMeHpUf6c+D\nM3oXFxcSExO5cuUS1at7Ym1tjbW1s7JWwH+dORNJr159WLRoPpMnT1Pe12isGD78f1hYWHL37h3u\n3btLdPQFOnY0rKrYrFkLzp07l+12wsPDCA8PU5YG1ukyuHkzgfLls54ozZkznVat2lK9uidxcTeI\nibnC++8bljK+f/8+Tk4ulCxZCk/PGlhZWaHVanFwcFCWTn6w3+HhYYSE7GPTpg0AaLVangUJ/oXI\n3QP7AbhcTkO9MtVyVGfuqlClXM21lFn6JYQwcO7VJ1dn6eagUql4eOkWrTZdWcUzcwlew3K9D2vc\n+DmWLv2O/fv3MH36ZEaOHItabYFer8vyPUM9Qzt6vR6dzpBAzMrK6rH9sbDIvE2p1xvqPHwm/99+\nPNCtW3defrkH48aN4o8/1tO5czdiYq7wyy9rWLbsR2xtbenXr4fSjwf7+KC97LZjZWVFt27d6dfv\nzcdu/wEXl3Js3ryR7t17YmlpRbly5R/JhHj48MEsyyVbWGQNsXq9YXtjx06gdm3jlmE3NZntX4gk\nnzoJwFkPa1zsnLL5tuGgT0s3/MLOH9ncrH0TQhQstWp5ERp6BICkpESuXbuKq6sbAGFhhpOCiIgT\nVK5cNUu9tWtXkZSUyIsvdqJnzz6cPXuG2rW9OHrU0Na6dWvZtm3zv0v5Gt47cSKMatVqGNW/ihVd\niYo6h1ar5ebNBM6ciXzid1UqFRMnBvPtt8uIjr7I7du3cXQsi62tLadORRAXF0d6ejqurm5ERp4G\n4MC/J0vZbefhpYpTUlL47LO5j+3DkCEBNGnyPN99t4wyZcqg1aZz6dJFANasWcn58+dytN8PL2N8\n/vw51qz5KUf1TE3O/AuRe2cjsQASKmcf+MFwyf8BBzvNU74phChq6tdvyIED+xk+fBBarZbhw0dj\nbW0DwLVrV3n33QCSkpKYMePjLPVcXd0IChqPg4MDGo2GoKAPsbCwYPr0YPbt+xt7e3uCg2eQkZHB\nrFlTGDlyKHq9nrFjA43qn5OTM61atWXw4AFUqlSF2rW9sbB48vmoo2NZRo8eR3BwEIsWfY2FhSXD\nhvlTt259unR5mU8+mc3o0WMJDg5ix45teHkZ2stuO3Xr1sfHx5chQ94C9PTo8doT+/DWW4MYPHgg\nLVq0YsKESUybNhkrKw3Ozi50796LhISEbPe7V6++zJw5hXfe+R96vc6kT0QYQ5b0LST0GRmcHeIP\nQMxkf1p7ZJ/T/8HSvdXdShH0hnEJgR4mS3San4xx/pBxhmHD/AkM/IBKlSqbpX1jxnjTpg106NAR\nlUrFm2++xueff0XZsjk7uXmcqKhz3L+fjI+PL1u2/EFERDjjxgWafDvPmimW9JUz/0Ii4ZQhuc8t\nB4scBf50beb9uTG9/MzWLyGEyK24uBsMGvQmVlYaOnbskueAbGtry9y5M1Gp1KjVaiZO/NAs2ykK\n5My/kPhtxlC8LqRwzNuePmO+yP77e86zft9FAL4JbJOnbcvZkvnJGOcPGWfzkzE2P1Oc+cuEv0Lg\n0r0reF1IAaB50545q3PdsPhP/w7GTcIRQghR9EnwLwQijmxXyq6NW+aozvFz8QD4VC1rlj4JIYQo\nvCT4FwLp+w4aCr5eT3wW9mF3k9KUslMpG3N1SwghRCElwb+Au516h0qxhpWe3LrmbEGL4OWHALCy\nVOfojwUhhBDFi1ln+8+cOZOwsDBUKhVBQUH4+voCcP36dcaNG6d87/Lly4wdO5b09HTmz5+Ph4cH\nAE2bNmXYsGHm7GKBF5t0A9sUw5xM2/8k43gcnV7PnUTDmf+7vWWWvxBCiEeZLfgfOnSI6OhoVq9e\nTVRUFEFBQaxevRqAcuXK8cMPPwCGvMb9+/enTZs2bN26lU6dOjFhwgRzdavQOX3zDNWtVFinP5qG\n83G++i1CKdf0KGPOrgkhhCikzHbZPyQkhHbt2gFQrVo17ty5Q2Ji4iPf+/XXX3nxxRcpUaKEubpS\nqMUmxmKdrkddyT3b7+r1eo78EwfAiB51zN01IYQQhZTZgn98fDxlymSeeTo6OhIXF/fI99auXUvP\nnpmPrx06dAh/f38GDBjAqVOnzNW9QkGn15FwyZAv2jI1+5Wfdh+/qpTreTqbrV9CCCEKt3zL8Pe4\nXEKhoaFUrVoVe3t7APz8/HB0dKRVq1aEhoYyYcIENmzYkG3bpkh4UFDN9/8C/HP23V4datGrQy2z\n9KMoj3FBIWOcP2SczU/GuOAzW/B3cXEhPj5eeX3jxg2cnbOeje7atYvnn39eeV2tWjWqVTMsVVuv\nXj1u3rxJRkZGlqUghRBCCJE3Zrvs36xZM7Zu3QrAyZMncXFxUc7wHwgPD6dWrcwz1aVLl7Jx40YA\nzpw5g6OjowR+IYQQwsTMduZfv359vL296dOnDyqViuDgYNatW4eDgwPt27cHIC4ujrJlMzPQde3a\nlfHjx7Nq1Sq0Wi0zZswwV/eEEEKIYqtILOwjhBBCiJyTDH9CCCFEMSPBXwghhChm8u1Rv9x4Unpg\ngP379/Ppp59iYWFBixYtGD58eLZ1xOMZO84HDx5k1KhReHp6AlCjRg0++OCDZ9X9QuFpY5yamsrk\nyZM5e/Ys69aty1Ed8Shjx1iO49x52jgfOHCATz/9FLVaTZUqVZgxYwZqtVqOZSMZO8aHDx82/ljW\nF1AHDx7UDx48WK/X6/Xnzp3T9+7dO8vnHTt21F+9elWfkZGh79u3r/7s2bPZ1hGPys04HzhwQD9i\nxIhn0d1CKbsxnjp1qn758uX67t2757iOyCo3YyzHsfGyG+f27dvrr127ptfr9foRI0bod+3aJcey\nkXIzxrk5lgvsZf+npQe+fPkypUqVokKFCqjValq2bElISEiOUwqLTLkZZ2Gc7I7LMWPGKJ/ntI7I\nKjdjLIyX3TivW7eO8uXLA4asrrdu3ZJj2Ui5GePcKLDB/2npgePi4nB0dHzks5ymFBaZcjPOAOfO\nnWPo0KH07duXffv25W+nC5nsjsv/5r/ISR2RVW7GGOQ4NlZOx/nGjRvs27ePli1byrFspNyMMRh/\nLBfoe/4P0+fiicTc1CnucjJmlStXJiAggI4dO3L58mXefPNNtm3bhkajyYceFn5yLJufHMf543Hj\nnJCQwNChQwkODs4SxJ5WRzxZTsY4N8dygT3zf1p64P9+dv36dVxcXHKUUlhklZtxLleuHJ06dUKl\nUuHh4YGTkxPXr1/P974XFrk5LuVYNk5uxkuOY+NlN86JiYkMGjSI0aNH07x58xzVEVnlZoxzcywX\n2OD/tPTAbm5uJCYmcuXKFbRaLX/99RfNmjXLUUphkVVuxnn9+vUsW7YMMNwaSEhIoFy5cs9sHwq6\n3ByXciwbJzfjJcex8bIb548++ogBAwbQokWLHNcRWeVmjHNzLBfoDH9z587lyJEjSnrgU6dOKemB\nDx8+zNy5cwHo0KED/v7+j63z8NoB4vGMHefExETGjRvH3bt3SU9PJyAgQLnvJB7vaWM8cuRIYmNj\nOXv2LD4+PvTu3ZuuXbvKsWwkY8e4devWchznwpPGuXnz5jRq1Ih69eop3+3SpQuvvfaaHMtGMnaM\nO3fubPSxXKCDvxBCCCFMr8Be9hdCCCGEeUjwF0IIIYoZCf5CCCFEMSPBXwghhChmJPgLIYQQxYwE\nfyEKgCtXruDj40P//v2z/Hf69Okn1lmwYAHz5s3Lx14+2ZIlS9i1axcAGzZsQKfTAdC/f38yMjLy\npQ+7d+/m9u3b+bItIQq7QpPeV4iiztHRkR9++OFZdyNXBg8erJQXLFhAx44dUavV+bo/3377LR9+\n+CGlS5fOt20KUVhJ8BeigIuKiiI4OBgLCwsSExMZPXo0L7zwgvK5Vqtl0qRJXLhwAZVKRe3atQkO\nDiYtLY2pU6cSHR1NUlISXbp04e23387S9rp169i+fTsqlYrr169TtWpVZs6ciZWVFYsWLWLXrl1Y\nWlri6enJpEmTSEtLY+zYsdy9exetVkvr1q0ZNmwYgYGBNGjQgGvXrhEdHc3AgQP54osvaNKkCSEh\nIXTq1Im///4bjUZDSkoKrVq1Ytu2bZw6dYqFCxei1+uxtLRk2rRpuLu7Z+ljmzZtlJzln3/+OfPn\nz1dWlyxfvjwff/wxa9eu5ciRI4wbN45Zs2ah1WqZPXs2Wq2W9PR0Jk+ejJeXl/l/WEIUEhL8hSjg\n4uPjGTVqFI0aNSI0NJRp06ZlCf5nzpwhLCyMzZs3A7BmzRru3bvH6tWrcXFxYfr06WRkZNC7d2+a\nNm36SHa18PBwtm3bhq2tLW+88QZ///03jo6ObNu2jbVr12JlZcXIkSPZuHEj9vb2aLVaVq5ciU6n\n44cfflAu8QOMHDmShQsX8u2332JpafjnpWTJktSvX589e/bQtm1bdu/eTePGjbGysiI4OJjVq1dT\nunRpduzYwZw5c1iwYMEjY1C5cmXGjx+PVqvF1taWlStXolar8ff3Z+/evfTr14+vv/6auXPnUqlS\nJbp27crChQvx8PAgMjKSoKAg1q1bZ44fjxCFkgR/IQqImzdv0r9//yzvzZ8/H2dnZ+bMmcO8efNI\nT09/5L52tWrVKFOmDIMGDaJ169Z07NgRBwcHDh48SGxsLIcPHwYgLS2NS5cuPRL869evj52dHQD1\n6tUjKiqKy5cv06hRI6ysrABo3Lgx4eHhDB8+nM8//5xRo0bRsmVLevXqhVqd/dShrl27snXrVtq2\nbcumTZvo1q0bZ8+eJS4ujhEjRgCQkZGBSqV6bP0H6UwtLS1Rq9X069cPS0tLzp8//8h65gkJCVy4\ncIGJEycq7yUmJqLT6XLUVyGKAwn+QhQQT7rnP3bsWDp37kzPnj05c+YMQ4cOzfK5tbU1K1eu5OTJ\nk/z111/07NmTn376CY1Gw/Dhw3nppZeeut2Hz9wfZPv+bxDW6/WoVCrKli3L77//TmhoKDt37qRH\njx78+uuv2e5bmzZtmD17Nnfu3OH48eN8/PHHnD9/nooVK+ZoXsCDP0KOHj3KL7/8wi+//IKdnR0j\nR4585LsajQYrK6tCO39CiPwgfwYLUcDFx8fj6ekJwKZNm0hLS8vyeXh4OL/++ive3t4EBATg7e3N\nxYsXadCggXIrQKfTMWvWrMfOhg8LC+P+/fvo9XqOHTtGzZo1qVu3LgcPHiQ9PR2AkJAQ/Pz82Lt3\nL7t27aJBgwa899572NnZkZCQkKU9lUqFVqvN8p61tTXPPfcc8+bNo3Xr1mg0GipXrsytW7c4c+YM\nAIcPH2b16tVPHYuEhARcXV2xs7MjJiaG48ePK+PxYLsODg64ubmxe/duAC5cuMAXX3yRo7EWoriQ\nM38hCri3336b9957Dzc3NwYOHMj27dv56KOPKFGiBAAeHh4sXLiQ1atXo9Fo8PDwoH79+vj5+XH2\n7Flee+01MjIyaNWq1WNnwteoUYP333+fK1eu4OnpSfPmzbGwsKBz5868/vrrqNVqvL296dKlC9eu\nXSMwMJCvv/4aCwsLmjdvjqura5b2XnjhBXr06MGXX36Z5f2uXbsyaNAgfvzxRwBsbGz4+OOPmThx\nItbW1gBMnTr1qWPRrFkzvvnmG/r27YunpycjRoxg4cKFNGnShObNmzN06FBmz57N7NmzmT59OkuW\nLEGr1RIYGJjr8ReiKJJV/YQoxtatW8f+/fuVZZuFEMWDXPYXQgghihk58xdCCCGKGTnzF0IIIYoZ\nCf5CCCFEMSPBXwghhChmJPgLIYQQxYwEfyGEEKKYkeAvhBBCFDP/Bx/o1GuPpFCZAAAAAElFTkSu\nQmCC\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] } ] } ================================================ FILE: experiments/tf_trainer/tf_hub_tfjs/run.local.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" python -m tf_trainer.tf_hub_tfjs.run \ --train_path=$train_path \ --validate_path=$valid_path \ --model_dir="tf_hub_tfjs_local_model_dir" \ --train_steps=9000 \ --labels=toxicity,severe_toxicity,obscene,sexual_explicit,identity_attack,insult,threat ================================================ FILE: experiments/tf_trainer/tf_hub_tfjs/run.py ================================================ """Experiments with Toxicity Dataset""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import tfrecord_input from tf_trainer.tf_hub_tfjs import model as tf_hub_classifier import pandas as pd import tensorflow as tf import tensorflow_hub as hub import sentencepiece as spm FLAGS = tf.app.flags.FLAGS class TFRecordWithSentencePiece(tfrecord_input.TFRecordInput): """Specialized setencepiece based input preprocessor.""" def __init__(self, spm_path): super().__init__() self._sp = spm.SentencePieceProcessor() self._sp.Load(spm_path) def dense_ids(self, texts): """Pads sentences ids out to max length, filling with 0's.""" return pd.DataFrame( [self._sp.EncodeAsIds(x) for x in texts]).fillna(0).values.astype(int) def pieces(self, feature_dict, label_dict): """Processes a batch of texts into sentence pieces.""" text = feature_dict.pop('text') sparse_ids = tf.contrib.layers.dense_to_sparse( tf.py_func(self.dense_ids, [text], tf.int64)) feature_dict['values'] = sparse_ids.values feature_dict['indices'] = sparse_ids.indices feature_dict['dense_shape'] = sparse_ids.dense_shape return feature_dict, label_dict def _input_fn_from_file(self, filepath: str): filenames_dataset = tf.data.Dataset.list_files(filepath) dataset = tf.data.TFRecordDataset( filenames_dataset) # type: tf.data.TFRecordDataset # Use parent class parsing to obtain text features, and processed labels. parsed_dataset = dataset.map(self._read_tf_example) return parsed_dataset.batch(self._batch_size).map( self.pieces).prefetch(self._num_prefetch) def main(argv): del argv # unused module = hub.Module(FLAGS.model_spec) with tf.Session() as sess: spm_path = sess.run(module(signature='spm_path')) dataset = TFRecordWithSentencePiece(spm_path) model = tf_hub_classifier.TFHubClassifierModel(dataset.labels()) trainer = model_trainer.ModelTrainer(dataset, model) trainer.train_with_eval() values = tf.placeholder(tf.int64, shape=[None], name='values') indices = tf.placeholder(tf.int64, shape=[None, 2], name='indices') dense_shape = tf.placeholder(tf.int64, shape=[None], name='dense_shape') serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({ 'values': values, 'indices': indices, 'dense_shape': dense_shape }) trainer.export(serving_input_fn, None) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tf_trainer/tf_kona_prototypical_network/proto.py ================================================ import numpy as np import tensorflow as tf import tensorflow_hub as hub import pandas as pd import sys import datetime import collections tf.app.flags.DEFINE_string( "train_file", "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/train_cleaned_text.csv", "CSV file containing the training data. Expects columns: domain, label, support_or_query" ) tf.app.flags.DEFINE_string( "validation_file", "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_cleaned_text.csv", "CSV file containing the validation data. Expects columns: domain, label, support_or_query" ) tf.app.flags.DEFINE_string( "test_file", "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_cleaned_text.csv", "CSV file containing the test data. Expects columns: domain, label, support_or_query" ) tf.app.flags.DEFINE_boolean( "test_mode", False, "If true then no training occurs and it prints out metrics on the test set." ) tf.app.flags.DEFINE_string("model_dir", "", "The model directory in GCS.") tf.app.flags.DEFINE_string( "encoding_layers", "256,128", "Comma delimited integers representing the number of units for each dense layer." ) FLAGS = tf.app.flags.FLAGS def distance(embeddings, prototype): return tf.map_fn(tf.norm, embeddings - prototype) def neg_distance(embs, proto): return -distance(embs, proto) def calculate_logits(embeddings, positive_prototype, negative_prototype): negative_logits = neg_distance(embeddings, negative_prototype) positive_logits = neg_distance(embeddings, positive_prototype) return tf.stack([negative_logits, positive_logits], axis=1) def prepare_dataset(data): data["text"] = data.text.fillna("") domains = data.domain.unique() positive_supports = [] positive_queries = [] negative_supports = [] negative_queries = [] for domain in domains: domain_data = data[data["domain"] == domain] positive = domain_data[domain_data["label"] == 1] negative = domain_data[domain_data["label"] == 0] positive_support = positive[positive["support_or_query"] == "support"].text positive_query = positive[positive["support_or_query"] == "query"].text negative_support = negative[negative["support_or_query"] == "support"].text negative_query = negative[negative["support_or_query"] == "query"].text positive_supports.append(positive_support) positive_queries.append(positive_query) negative_supports.append(negative_support) negative_queries.append(negative_query) return tf.data.Dataset.from_tensor_slices({ "positive_supports": np.array(positive_supports), "negative_supports": np.array(negative_supports), "positive_queries": np.array(positive_queries), "negative_queries": np.array(negative_queries) }) def encoder(dense_config, output_types, output_shapes): """Tensorflow graph for getting prototypes and embeddings. It contains a placeholder for a tensorflow Iterator called "handle" whose elements are a dict containing negative_supports, positive_supports, negative_queries, and positive_queries. All of these are lists of strings. Args: dense_config: A list of integers that configure the dense layers. output_types: A dictionary from output name to it's tf type. output_shapes: A dictionary from output name to it's shape. Returns: A tuple of logits, the first representing those from the negative query set and the second from the positive query set. """ if not dense_config: raise ValueError("encoder must be called with a non empty dense_config") embed = hub.Module( "https://tfhub.dev/google/universal-sentence-encoder-large/3") dense_layers = [ tf.keras.layers.Dense(units, activation=tf.nn.relu) for units in dense_config ] last_layer = tf.keras.layers.Dense(dense_config[-1], activation=None) def get_embeddings(texts): result = embed(texts) for dense_layer in dense_layers: result = dense_layer(result) return last_layer(result) get_prototype = lambda texts: tf.reduce_mean(get_embeddings(texts), 0) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle(handle, output_types, output_shapes) episode_batch = iterator.get_next() with tf.variable_scope("negative_prototype"): negative_prototype = get_prototype(episode_batch["negative_supports"]) with tf.variable_scope("positive_prototype"): positive_prototype = get_prototype(episode_batch["positive_supports"]) with tf.variable_scope("negative_embeddings"): negative_embeddings = get_embeddings(episode_batch["negative_queries"]) with tf.variable_scope("positive_embeddings"): positive_embeddings = get_embeddings(episode_batch["positive_queries"]) negative_logits = calculate_logits(negative_embeddings, positive_prototype, negative_prototype) positive_logits = calculate_logits(positive_embeddings, positive_prototype, negative_prototype) return handle, negative_logits, positive_logits def train_operation(negative_logits, positive_logits): negative_loss = tf.losses.softmax_cross_entropy( tf.broadcast_to(tf.one_hot(0, 2), tf.shape(negative_logits)), negative_logits) positive_loss = tf.losses.softmax_cross_entropy( tf.broadcast_to(tf.one_hot(1, 2), tf.shape(positive_logits)), positive_logits) loss = negative_loss + positive_loss optimizer = tf.train.AdamOptimizer(0.001) train = optimizer.minimize(loss) return (train, loss) def predictions_and_metrics(negative_logits, positive_logits): predict = lambda logits: tf.argmax(logits, axis=1) negative_predictions = predict(negative_logits) negative_labels = tf.fill(tf.shape(negative_predictions), 0) positive_predictions = predict(positive_logits) positive_labels = tf.fill(tf.shape(positive_predictions), 1) probability = tf.nn.softmax( tf.concat([negative_logits, positive_logits], -2), axis=-1) labels = tf.concat([negative_labels, positive_labels], -1) predictions = tf.concat([negative_predictions, positive_predictions], -1) acc_op, update_acc_op = tf.metrics.accuracy(labels, predictions) auc_op, update_auc_op = tf.metrics.auc(labels, tf.gather(probability, 1, axis=-1)) return (predictions, acc_op, auc_op, update_acc_op, update_auc_op) def main(): if FLAGS.model_dir: model_dir = FLAGS.model_dir else: st = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") model_dir = "gs://conversationai-models/jjtan/transfer_learning/model/" + st print("Model dir: " + model_dir) save_path = model_dir + "/save/model.ckpt" metadata_path = model_dir + "/meta.txt" with tf.gfile.Open(metadata_path, "w") as f: f.write("Encoding Layers: " + FLAGS.encoding_layers + "\n") # TODO(jjtan): Convert to flags. output_types = { "negative_queries": tf.string, "negative_supports": tf.string, "positive_queries": tf.string, "positive_supports": tf.string } output_shapes = { "negative_queries": tf.TensorShape([tf.Dimension(12)]), "negative_supports": tf.TensorShape([tf.Dimension(8)]), "positive_queries": tf.TensorShape([tf.Dimension(12)]), "positive_supports": tf.TensorShape([tf.Dimension(8)]) } with tf.variable_scope("encoder"): encoding_units = [int(units) for units in FLAGS.encoding_layers.split(",")] handle, negative_logits, positive_logits = encoder(encoding_units, output_types, output_shapes) if FLAGS.test_mode: print("In TEST mode.") with tf.gfile.Open(FLAGS.test_file, "r") as f: test_df = pd.read_csv(f) print("Test Dataframe Shape: " + str(test_df.shape)) test_ds = prepare_dataset(test_df).shuffle(64) # Test specific model components. with tf.variable_scope("test_predictions_and_metrics"): _, acc_op, auc_op, update_acc_op, update_auc_op = predictions_and_metrics( negative_logits, positive_logits) saver = tf.train.Saver() test_itr = test_ds.make_one_shot_iterator() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) sess.run(tf.initializers.local_variables()) checkpoint = tf.train.latest_checkpoint(model_dir + "/save") saver.restore(sess, checkpoint) test_itr_handle = sess.run(test_itr.string_handle()) while True: try: _, _ = sess.run([update_acc_op, update_auc_op], feed_dict={handle: test_itr_handle}) except tf.errors.OutOfRangeError: break test_acc, test_auc = sess.run([acc_op, auc_op]) print("TEST ACCURACY: " + str(test_acc)) print("TEST AUC: " + str(test_auc)) else: print("In TRAINING mode.") with tf.gfile.Open(FLAGS.train_file, "r") as f: train_df = pd.read_csv(f) print("Train Dataframe Shape: " + str(train_df.shape)) train_dataset = prepare_dataset(train_df).shuffle(128).repeat() with tf.gfile.Open(FLAGS.validation_file, "r") as f: validation_df = pd.read_csv(f) print("Validation Dataframe Shape: " + str(validation_df.shape)) validation_dataset = prepare_dataset(validation_df).shuffle(64) # Training specific model components. with tf.variable_scope("training_operations"): train_op, loss_op = train_operation(negative_logits, positive_logits) with tf.variable_scope("train_predictions_and_metrics"): _, train_acc_op, train_auc_op, train_update_acc_op, train_update_auc_op = predictions_and_metrics( negative_logits, positive_logits) with tf.variable_scope("validation_predictions_and_metrics"): _, val_acc_op, val_auc_op, val_update_acc_op, val_update_auc_op = predictions_and_metrics( negative_logits, positive_logits) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) sess.run(tf.initializers.local_variables()) train_writer = tf.summary.FileWriter(model_dir + "/train", sess.graph) validation_writer = tf.summary.FileWriter(model_dir + "/validation", sess.graph) training_iterator = train_dataset.make_one_shot_iterator() validation_iterator = validation_dataset.make_initializable_iterator() training_handle = sess.run(training_iterator.string_handle()) validation_handle = sess.run(validation_iterator.string_handle()) best_auc = 0 for batch_num in range(500): print("Batch: " + str(batch_num)) batch_size = 32 for i in range(batch_size): _, loss, train_acc, train_auc = sess.run( [train_op, loss_op, train_update_acc_op, train_update_auc_op], feed_dict={handle: training_handle}) training_summary = tf.Summary(value=[ tf.Summary.Value(tag="loss", simple_value=loss), tf.Summary.Value(tag="accuracy", simple_value=train_acc), tf.Summary.Value(tag="auc", simple_value=train_auc), ]) train_writer.add_summary(training_summary, batch_num * batch_size + i) train_writer.flush() recent_aucs = collections.deque([], 3) sess.run(validation_iterator.initializer) for _ in range(32): _, _ = sess.run([val_update_acc_op, val_update_auc_op], feed_dict={handle: validation_handle}) val_acc, val_auc = sess.run([val_acc_op, val_auc_op]) # Save best version if val_auc > best_auc: best_auc = val_auc saved_path = saver.save( sess, save_path, global_step=(batch_num + 1) * batch_size) # Early stopping if len(recent_aucs) >= 3 and all( val_auc < prev_auc for prev_auc in recent_aucs): break recent_aucs.append(val_auc) validation_summary = tf.Summary(value=[ tf.Summary.Value(tag="accuracy", simple_value=val_acc), tf.Summary.Value(tag="auc", simple_value=val_auc), ]) validation_writer.add_summary(validation_summary.SerializeToString(), (batch_num + 1) * batch_size) validation_writer.flush() if __name__ == "__main__": main() ================================================ FILE: experiments/tf_trainer/tf_word_label_embedding/__init__.py ================================================ ================================================ FILE: experiments/tf_trainer/tf_word_label_embedding/hparam_config.yaml ================================================ trainingInput: ## BASIC_GPU uses single NVIDIA Tesla K80 GPU. pythonVersion: '3.5' scaleTier: BASIC_GPU hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc/frac_neg maxTrials: 20 maxParallelTrials: 3 enableTrialEarlyStopping: TRUE params: - parameterName: learning_rate type: DOUBLE minValue: 0.0000005 maxValue: 0.01 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 1 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - 128 - 256 - parameterName: dense_units type: CATEGORICAL categoricalValues: - '128' - '128,128' - '128,128,128' - '64' - '64,64' - '64,64,64' ================================================ FILE: experiments/tf_trainer/tf_word_label_embedding/model.py ================================================ """Tensorflow Estimator implementation of Word Label Embeddings.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import numpy as np from tf_trainer.common import base_model from typing import Set FLAGS = tf.app.flags.FLAGS # Hyperparameters tf.app.flags.DEFINE_float('learning_rate', 0.000003, 'The learning rate to use during training.') tf.app.flags.DEFINE_integer('embedding_size', 100, 'The number of dimensions in the word embedding.') # This would normally just be a multi_integer, but we use string due to # constraints with ML Engine hyperparameter tuning. tf.app.flags.DEFINE_string( 'dense_units', '128', 'Comma delimited string for the number of hidden units in the dense layer.') class TFWordLabelEmbeddingModel(base_model.BaseModel): def __init__(self, target_label: str) -> None: assert len(target_label) == 1 # Only single feature supported. self._target_label = target_label[0] @staticmethod def hparams(): dense_units = [int(units) for units in FLAGS.dense_units.split(',')] hparams = tf.contrib.training.HParams( learning_rate=FLAGS.learning_rate, embedding_size=FLAGS.embedding_size, dense_units=dense_units) return hparams def estimator(self, model_dir): estimator = tf.estimator.Estimator( model_fn=self._model_fn, params=self.hparams(), config=tf.estimator.RunConfig(model_dir=model_dir)) return estimator def _model_fn(self, features, labels, mode, params, config): word_emb_seq = features[base_model.TOKENS_FEATURE_KEY] # Constants labels = labels[self._target_label] # Class emb class_emb_initializer = tf.random_normal_initializer( mean=0.0, stddev=1.0, dtype=tf.float32) class_embs = tf.get_variable( 'class_embs', [2, params.embedding_size], initializer=class_emb_initializer) word_emb_seq_norm = tf.nn.l2_normalize(word_emb_seq, axis=-1) class_embs_norm = tf.nn.l2_normalize(class_embs, axis=-1) cosine_distance = tf.contrib.keras.backend.dot( word_emb_seq_norm, tf.transpose(class_embs_norm)) cosine_distance = tf.expand_dims(cosine_distance, axis=-1) cosine_distance = tf.contrib.layers.conv2d( cosine_distance, num_outputs=1, kernel_size=[5, 1], padding='SAME', activation_fn=tf.nn.relu) cosine_distance = tf.squeeze(cosine_distance, axis=-1) max_cosine_distance = tf.reduce_max(cosine_distance, axis=-1) attention = tf.nn.softmax(max_cosine_distance, axis=-1) attention = tf.expand_dims(attention, axis=-1) weighted_word_emb = tf.reduce_sum(word_emb_seq * attention, axis=1) f2 = [] for num_units in params.dense_units: f2.append(tf.layers.Dense(units=num_units, activation=tf.nn.relu)) f2.append(tf.layers.Dense(units=1, activation=None)) logits = weighted_word_emb for layer in f2: logits = layer(logits) class_zero_logits = tf.expand_dims(class_embs[0, :], 0) for layer in f2: class_zero_logits = layer(class_zero_logits) class_zero_reg = tf.nn.sigmoid_cross_entropy_with_logits( labels=[[0.0]], logits=class_zero_logits) class_one_logits = tf.expand_dims(class_embs[1, :], 0) for layer in f2: class_one_logits = layer(class_one_logits) class_one_reg = tf.nn.sigmoid_cross_entropy_with_logits( labels=[[1.0]], logits=class_one_logits) loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) + class_zero_reg + class_one_reg head = tf.contrib.estimator.binary_classification_head( name=self._target_label, loss_fn=lambda labels, logits: loss) optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) return head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ================================================ FILE: experiments/tf_trainer/tf_word_label_embedding/run.hyperparameter.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_word_label_embedding" MODEL_NAME_DATA="${MODEL_NAME}_$1" JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.10 \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --verbosity=debug \ --config="tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml" \ -- \ --train_path=$train_path \ --validate_path=$valid_path \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \ --embedding_size=300 \ --model_dir="${JOB_DIR}/model_dir" \ --is_embedding_trainable=False \ --train_steps=$train_steps \ --eval_period=$eval_period \ --eval_steps=$eval_steps \ --labels=$labels \ --label_dtypes=$label_dtypes \ --preprocess_in_tf=False echo "Model dir:" echo ${JOB_DIR}/model_dir ================================================ FILE: experiments/tf_trainer/tf_word_label_embedding/run.local.sh ================================================ #!/bin/bash source "tf_trainer/common/dataset_config.sh" python -m tf_trainer.tf_word_label_embedding.run \ --train_path=$train_path \ --validate_path=$valid_path \ --model_dir="tf_word_label_embedding_local_model_dir" \ --labels=$labels \ --label_dtypes=$label_dtypes ================================================ FILE: experiments/tf_trainer/tf_word_label_embedding/run.ml_engine.sh ================================================ #!/bin/bash # This script runs one training job on Cloud MLE. # Note: # We currently use 2 different embeddings: # - glove.6B/glove.6B.300d.txt # - google-news/GoogleNews-vectors-negative300.txt # Glove assumes all words are lowercased, while Google-news handles different casing. # As there is currently no tf operation that perform lowercasing, we have the following # requirements: # - For google news: Run preprocess_in_tf=True (no lowercasing). # - For glove.6B, Run preprocess_in_tf=False (will force lowercasing). source "tf_trainer/common/dataset_config.sh" DATETIME=$(date '+%Y%m%d_%H%M%S') MODEL_NAME="tf_word_label_embedding" MODEL_NAME_DATA="${MODEL_NAME}_$1" JOB_DIR="${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}" gcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME}_${USER}_${DATETIME} \ --job-dir=${JOB_DIR} \ --runtime-version=1.10 \ --scale-tier 'BASIC_GPU' \ --module-name="tf_trainer.${MODEL_NAME}.run" \ --package-path=tf_trainer \ --python-version "3.5" \ --region=us-east1 \ --verbosity=debug \ -- \ --train_path="${GCS_RESOURCES}/toxicity_q42017_train.tfrecord" \ --validate_path="${GCS_RESOURCES}/toxicity_q42017_validate.tfrecord" \ --embeddings_path="${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt" \ --embedding_size=300 \ --preprocess_in_tf=False \ --model_dir="${JOB_DIR}/model_dir" ================================================ FILE: experiments/tf_trainer/tf_word_label_embedding/run.py ================================================ """Experiments with Toxicity Dataset""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import nltk import tensorflow as tf from tf_trainer.common import base_model from tf_trainer.common import model_trainer from tf_trainer.common import serving_input from tf_trainer.common import text_preprocessor from tf_trainer.common import tfrecord_input from tf_trainer.common import types from tf_trainer.tf_word_label_embedding import model as tf_word_label_embedding FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('embeddings_path', 'local_data/glove.6B/glove.6B.100d.txt', 'Path to the embeddings file.') def main(argv): del argv # unused preprocessor = text_preprocessor.TextPreprocessor(FLAGS.embeddings_path) nltk.download('punkt') train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize) dataset = tfrecord_input.TFRecordInputWithTokenizer( train_preprocess_fn=train_preprocess_fn, max_seq_len=5000) model_tf = tf_word_label_embedding.TFWordLabelEmbeddingModel(dataset.labels()) model = preprocessor.add_embedding_to_model(model_tf, base_model.TOKENS_FEATURE_KEY) trainer = model_trainer.ModelTrainer(dataset, model) trainer.train_with_eval() if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.INFO) tf.app.run(main) ================================================ FILE: experiments/tools/bert_tfrecord_converter.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Converts our TFRecord data into the format expected by the BERT model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import bert from bert import run_classifier import collections import numpy as np import pandas as pd import tensorflow as tf import tensorflow_hub as hub tf.app.flags.DEFINE_string('input_data_path', None, 'Path to the input TFRecord files.') tf.app.flags.DEFINE_string('output_data_path', None, 'Path to write the output TFRecord files.') tf.app.flags.DEFINE_string('filenames', None, 'Comma separated list of filenames.') tf.app.flags.DEFINE_string('text_key', 'comment_text', 'tf.Example key for text field in input TFRecord.') tf.app.flags.DEFINE_string('label_key', 'toxicity', 'tf.Example key for label field in input TFRecord.') tf.app.flags.DEFINE_string('bert_url', 'https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1', 'TF Hub URL for BERT Model') tf.app.flags.DEFINE_integer('max_sequence_length', 128, 'Maximum sequence length of tokenized comment.') FLAGS = tf.app.flags.FLAGS def create_int_feature(values): f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) return f def create_tokenizer_from_hub_module(url): """Get the vocab file and casing info from the Hub module.""" with tf.Graph().as_default(): bert_module = hub.Module(url) tokenization_info = bert_module(signature="tokenization_info", as_dict=True) with tf.Session() as sess: vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"], tokenization_info["do_lower_case"]]) return bert.tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=do_lower_case) def convert_tfrecord_for_bert(filenames, input_data_path, output_data_path, bert_tfhub_url, text_key, label_key, max_seq_length): """Converts input TFRecords into the format expected by the BERT model.""" tokenizer = create_tokenizer_from_hub_module(bert_tfhub_url) for filename in filenames: print('Working on {}...'.format(filename)) in_filepath = '{}{}'.format(input_data_path, filename) #TODO: Check if file exists, if not write new file #TODO: Have the filename reflect the max_sequence_length and path reflect model out_filepath = '{}{}'.format(output_data_path, filename) record_iterator = tf.python_io.tf_record_iterator(path=in_filepath) writer = tf.python_io.TFRecordWriter(out_filepath) for ex_index, string_record in enumerate(record_iterator): example = tf.train.Example() example.ParseFromString(string_record) text = example.features.feature[text_key].bytes_list.value[0] label = example.features.feature[label_key].float_list.value[0] label = round(label) ex = run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping text_a = text, text_b = None, label = label) label_list = [0, 1] feature = run_classifier.convert_single_example(ex_index, ex, label_list, max_seq_length, tokenizer) features = collections.OrderedDict() features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["label_ids"] = create_int_feature([feature.label_id]) features["is_real_example"] = create_int_feature( [int(feature.is_real_example)]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) writer.close() print('... Done!') if __name__ == '__main__': filenames = [name.strip() for name in FLAGS.filenames.split(',')] convert_tfrecord_for_bert(filenames, FLAGS.input_data_path, FLAGS.output_data_path, FLAGS.bert_url, FLAGS.text_key, FLAGS.label_key, FLAGS.max_sequence_length) ================================================ FILE: experiments/tools/convert_csv_to_tfrecord.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """A function to convert csvs to TFRecords.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import pandas as pd import tensorflow as tf FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("input_csv_path", None, "Path to the input csv file.") tf.app.flags.DEFINE_string("output_tfrecord_path", None, "Path where the output TFRecord should be written.") tf.app.flags.DEFINE_string("column_list", None, "Comma seperated list of column names.") tf.app.flags.DEFINE_string("dtype_list", None, "Comma seperated list of column dtypes. " "Each entry should be one of [bytes,str,float,int]).") def convert_csv_to_tfrecord(input_csv_path, output_tfrecord_path, column_names, column_dtypes): df = pd.read_csv(tf.gfile.Open(input_csv_path)) with tf.python_io.TFRecordWriter(output_tfrecord_path) as writer: for row in df.iterrows(): row = row[1] example = tf.train.Example() for col_name,dtype in zip(column_names,column_dtypes): col_val = row[col_name] if dtype == 'bytes': example.features.feature[col_name].bytes_list.value.append( col_val) elif dtype == 'str': example.features.feature[col_name].bytes_list.value.append( str(col_val).encode("utf-8", errors="replace")) elif dtype == 'float': example.features.feature[col_name].float_list.value.append(col_val) elif dtype == 'int': example.features.feature[col_name].int64_list.value.append(col_val) else: raise ValueError('dtype must be one of bytes, str, float, int.') writer.write(example.SerializeToString()) def main(argv): del argv # unused input_csv_path = FLAGS.input_csv_path output_tfrecord_path = FLAGS.output_tfrecord_path column_names = FLAGS.column_list.split(',') column_dtypes = FLAGS.dtype_list.split(',') assert len(column_names) == len(column_dtypes) convert_csv_to_tfrecord(input_csv_path, output_tfrecord_path, column_names, column_dtypes) if __name__ == "__main__": tf.app.run(main) ================================================ FILE: experiments/tools/convert_jsonl_to_tfrecord.py ================================================ # coding=utf-8 # Copyright 2018 The Conversation-AI.github.io Authors. # # Licensed under the Apache License, Version 2.0 (the 'License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """A function to convert jsonlines to TFRecords. python tools/convert_jsonl_to_tfrecord.py \ --input_jsonlines_path=tf_trainer/common/testdata/cats_and_dogs.jsonl \ --text_fields_re='^(text)$' \ --label_fields_re='^(bad)$' \ --output_tfrecord_path=local_data/testdata/cats_and_dogs.recordio """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl import flags from absl import app from absl import logging import json import jsonlines import tensorflow as tf import re FLAGS = flags.FLAGS # TODO: Compute basic stats for text fields and labels. flags.DEFINE_string('text_fields_re', None, 'Matcher for names of the text fields.') flags.register_validator( 'text_fields_re', lambda value: isinstance(value, str) and re.compile(value), message='--text_fields_re must be a regexp string.') flags.DEFINE_string('label_fields_re', None, 'Matcher for names of the label fields.') flags.register_validator( 'label_fields_re', lambda value: isinstance(value, str) and re.compile(value), message='--label_fields_re must be a regexp string.') flags.DEFINE_string('input_jsonlines_path', None, 'Path to the JSON-lines input file.') flags.register_validator( 'input_jsonlines_path', lambda value: isinstance(value, str), message='--input_jsonlines_path must be a string.') flags.DEFINE_string('output_tfrecord_path', None, 'Path where the output TFRecord should be written.') flags.register_validator( 'output_tfrecord_path', lambda value: isinstance(value, str), message='--output_tfrecord_path must be a string.') class MisingAllTextFieldsError(Exception): pass class FieldsCounter(): def __init__(self): self.counters = {} def inc_field(self, field_name: str): if field_name not in self.counters: self.counters[field_name] = 0 self.counters[field_name] += 1 def make_selected_output_row(row, line, counters): """Create an output row with just the fields matching --text_fields_re and --label_fields_re. If there is no matching field in the row for --text_fields_re then raise MisingAllTextFieldsError. """ text_field_matcher = re.compile(FLAGS.text_fields_re) label_field_matcher = re.compile(FLAGS.label_fields_re) has_text_field = False output_row = {} for key, value in row.items(): if text_field_matcher.match(key): has_text_field = True counters.inc_field(key) output_row[key] = value elif label_field_matcher.match(key): counters.inc_field(key) output_row[key] = value if not has_text_field: raise MisingAllTextFieldsError( f'Error parsing file {FLAGS.input_jsonlines_path} at line: {line}.\n' f'No field matched by --text_field_regexp="{FLAGS.text_fields_re}":\n' f' {json.dumps(row, sort_keys=True, indent=2)}') return output_row def itr_as_dict(input_jsonlines_path): with tf.gfile.Open(input_jsonlines_path) as f: counters = FieldsCounter() line = 0 for row in jsonlines.Reader(f): line += 1 yield make_selected_output_row(row, line, counters) logging.info(f'Complete.\nField Counts:\n' f'{json.dumps(counters.counters, sort_keys=True, indent=2)}') def itr_as_tfrecord(input_jsonlines_path): for row in itr_as_dict(input_jsonlines_path): example = tf.train.Example() for key, value in row.items(): if isinstance(value, str): example.features.feature[key].bytes_list.value.append( value.encode('utf-8', errors='replace')) elif isinstance(value, float) or isinstance(value, int): example.features.feature[key].float_list.value.append(value) yield example def convert_to_tfrecord(input_jsonlines_path, output_tfrecord_path): with tf.python_io.TFRecordWriter(output_tfrecord_path) as writer: for example in itr_as_tfrecord(input_jsonlines_path): writer.write(example.SerializeToString()) def main(argv): del argv # unused convert_to_tfrecord(FLAGS.input_jsonlines_path, FLAGS.output_tfrecord_path) if __name__ == '__main__': app.run(main) ================================================ FILE: hierarchical_attention_research/han_model/.gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject ================================================ FILE: hierarchical_attention_research/han_model/HAN_model.py ================================================ import tensorflow as tf import tensorflow.contrib.layers as layers import numpy as np import data_util from model_components import task_specific_attention, bidirectional_rnn class HANClassifierModel(): """ Implementation of document classification model described in `Hierarchical Attention Networks for Document Classification (Yang et al., 2016)` (https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf) """ def __init__(self, vocab_size, embedding_size, classes, fw_word_cell, bw_word_cell, fw_sentence_cell, bw_sentence_cell, word_output_size, sentence_output_size, max_grad_norm, dropout_keep_proba, is_training=None, learning_rate=1e-4, device='/cpu:0', scope=None): self.vocab_size = vocab_size self.embedding_size = embedding_size self.classes = classes self.fw_word_cell = fw_word_cell self.bw_word_cell = bw_word_cell self.word_output_size = word_output_size self.fw_sentence_cell = fw_sentence_cell self.bw_sentence_cell = bw_sentence_cell self.sentence_output_size = sentence_output_size self.max_grad_norm = max_grad_norm self.dropout_keep_proba = dropout_keep_proba with tf.variable_scope(scope or 'tcm') as scope: self.global_step = tf.Variable(0, name='global_step', trainable=False) if is_training is not None: self.is_training = is_training else: self.is_training = tf.placeholder(dtype=tf.bool, name='is_training') self.sample_weights = tf.placeholder( shape=(None,), dtype=tf.float32, name='sample_weights') # [document x sentence x word] self.inputs = tf.placeholder( shape=(None, None, None), dtype=tf.int32, name='inputs') # [document x sentence] self.word_lengths = tf.placeholder( shape=(None, None), dtype=tf.int32, name='word_lengths') # [document] self.sentence_lengths = tf.placeholder( shape=(None,), dtype=tf.int32, name='sentence_lengths') # [document] self.labels = tf.placeholder(shape=(None,), dtype=tf.int32, name='labels') (self.document_size, self.sentence_size, self.word_size) = tf.unstack( tf.shape(self.inputs)) self._init_embedding(scope) # embeddings cannot be placed on GPU with tf.device(device): self._init_body(scope) with tf.variable_scope('train'): self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.labels, logits=self.logits) self.loss = tf.reduce_mean( tf.multiply(self.cross_entropy, self.sample_weights)) tf.summary.scalar('loss', self.loss) self.accuracy = tf.reduce_mean( tf.cast(tf.nn.in_top_k(self.logits, self.labels, 1), tf.float32)) tf.summary.scalar('accuracy', self.accuracy) tvars = tf.trainable_variables() grads, global_norm = tf.clip_by_global_norm( tf.gradients(self.loss, tvars), self.max_grad_norm) tf.summary.scalar('global_grad_norm', global_norm) opt = tf.train.AdamOptimizer(learning_rate) self.train_op = opt.apply_gradients( zip(grads, tvars), name='train_op', global_step=self.global_step) self.summary_op = tf.summary.merge_all() def _init_embedding(self, scope): with tf.variable_scope(scope): with tf.variable_scope('embedding') as scope: self.embedding_matrix = tf.get_variable( name='embedding_matrix', shape=[self.vocab_size, self.embedding_size], initializer=layers.xavier_initializer(), dtype=tf.float32) self.inputs_embedded = tf.nn.embedding_lookup(self.embedding_matrix, self.inputs) def _init_body(self, scope): with tf.variable_scope(scope): word_level_inputs = tf.reshape(self.inputs_embedded, [ self.document_size * self.sentence_size, self.word_size, self.embedding_size ]) word_level_lengths = tf.reshape(self.word_lengths, [self.document_size * self.sentence_size]) with tf.variable_scope('word') as scope: word_encoder_output, _ = bidirectional_rnn( self.fw_word_cell, self.bw_word_cell, word_level_inputs, word_level_lengths, scope=scope) with tf.variable_scope('attention') as scope: word_level_output = task_specific_attention( word_encoder_output, self.word_output_size, scope=scope) with tf.variable_scope('dropout'): word_level_output = layers.dropout( word_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) # sentence_level sentence_inputs = tf.reshape( word_level_output, [self.document_size, self.sentence_size, self.word_output_size]) with tf.variable_scope('sentence') as scope: sentence_encoder_output, _ = bidirectional_rnn( self.fw_sentence_cell, self.bw_sentence_cell, sentence_inputs, self.sentence_lengths, scope=scope) with tf.variable_scope('attention') as scope: sentence_level_output = task_specific_attention( sentence_encoder_output, self.sentence_output_size, scope=scope) with tf.variable_scope('dropout'): sentence_level_output = layers.dropout( sentence_level_output, keep_prob=self.dropout_keep_proba, is_training=self.is_training, ) with tf.variable_scope('classifier'): self.logits = layers.fully_connected( sentence_level_output, self.classes, activation_fn=None) self.prediction = tf.argmax(self.logits, axis=-1) def get_feed_data(self, x, y=None, class_weights=None, is_training=True): x_m, doc_sizes, sent_sizes = data_util.batch(x) fd = { self.inputs: x_m, self.sentence_lengths: doc_sizes, self.word_lengths: sent_sizes, } if y is not None: fd[self.labels] = y if class_weights is not None: fd[self.sample_weights] = [class_weights[yy] for yy in y] else: fd[self.sample_weights] = np.ones(shape=[len(x_m)], dtype=np.float32) fd[self.is_training] = is_training return fd if __name__ == '__main__': try: from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple, GRUCell except ImportError: LSTMCell = tf.nn.rnn_cell.LSTMCell LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple GRUCell = tf.nn.rnn_cell.GRUCell tf.reset_default_graph() with tf.Session() as session: model = HANClassifierModel( vocab_size=10, embedding_size=5, classes=2, fw_word_cell=GRUCell(10), bw_word_cell=GRUCell(10), fw_sentence_cell=GRUCell(10), bw_sentence_cell=GRUCell(10), word_output_size=10, sentence_output_size=10, max_grad_norm=5.0, dropout_keep_proba=0.5, ) session.run(tf.global_variables_initializer()) fd = { model.is_training: False, model.inputs: [[[5, 4, 1, 0], [3, 3, 6, 7], [6, 7, 0, 0]], [[2, 2, 1, 0], [3, 3, 6, 7], [0, 0, 0, 0]]], model.word_lengths: [ [3, 4, 2], [3, 4, 0], ], model.sentence_lengths: [3, 2], model.labels: [0, 1], } print(session.run(model.logits, fd)) session.run(model.train_op, fd) ================================================ FILE: hierarchical_attention_research/han_model/LICENSE ================================================ MIT License Copyright (c) 2017 Matvey Ezhov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: hierarchical_attention_research/han_model/README.md ================================================ # Deep Text Classifier Implementation of document classification model described in [Hierarchical Attention Networks for Document Classification (Yang et al., 2016)](https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf). ## How to run 1. Create a virtual environment, activate it, and install requirements: ``` python3 -m venv env source env/bin/activate pip install -r requirements.txt ``` 2. Download the English model for spaCy: ``` python -m spacy download en ``` 3. Get [Yelp review dataset](https://www.yelp.com/dataset_challenge) and extract it in this directory. ``` python3 yelp_prepare.py dataset/review.json python3 worker.py --mode=train --device=/gpu:0 --batch-size=30 ``` ## Results I am getting 65% accuracy on a dev set (16% of data) after 3 epochs. Results reported in the paper are 71% on Yelp'15. No systemic hyperparameter optimization was performed. ================================================ FILE: hierarchical_attention_research/han_model/bn_lstm.py ================================================ # borrowed from https://github.com/OlavHN/bnlstm, updated for r1.0 import math import numpy as np import tensorflow as tf try: from tensorflow.contrib.rnn import RNNCell except ImportError: RNNCell = tf.nn.rnn_cell.RNNCel class LSTMCell(RNNCell): """Vanilla LSTM implemented with same initializations as BN-LSTM""" def __init__(self, num_units): self.num_units = num_units @property def state_size(self): return (self.num_units, self.num_units) @property def output_size(self): return self.num_units def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = state # Keep W_xh and W_hh separate here as well to reuse initialization methods x_size = x.get_shape().as_list()[1] W_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=orthogonal_initializer()) W_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=bn_lstm_identity_initializer(0.95)) bias = tf.get_variable('bias', [4 * self.num_units]) # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias # improve speed by concat. concat = tf.concat([x, h], 1) W_both = tf.concat([W_xh, W_hh], 0) hidden = tf.matmul(concat, W_both) + bias i, j, f, o = tf.split(hidden, 4, axis=1) new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, (new_c, new_h) class BNLSTMCell(RNNCell): """Batch normalized LSTM as described in http://arxiv.org/abs/1603.09025""" def __init__(self, num_units, training): self.num_units = num_units self.training = training @property def state_size(self): return (self.num_units, self.num_units) @property def output_size(self): return self.num_units def __call__(self, x, state, scope=None): with tf.variable_scope(scope or 'bn_lstm'): c, h = state x_size = x.get_shape().as_list()[1] W_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=orthogonal_initializer()) W_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=bn_lstm_identity_initializer(0.95)) bias = tf.get_variable('bias', [4 * self.num_units]) xh = tf.matmul(x, W_xh) hh = tf.matmul(h, W_hh) bn_xh = batch_norm(xh, 'xh', self.training) bn_hh = batch_norm(hh, 'hh', self.training) hidden = bn_xh + bn_hh + bias i, j, f, o = tf.split(hidden, 4, axis=1) new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) bn_new_c = batch_norm(new_c, 'c', self.training) new_h = tf.tanh(bn_new_c) * tf.sigmoid(o) return new_h, (new_c, new_h) def orthogonal(shape): flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) q = u if u.shape == flat_shape else v return q.reshape(shape) def bn_lstm_identity_initializer(scale): def _initializer(shape, dtype=tf.float32, partition_info=None): """Ugly cause LSTM params calculated in one matrix multiply""" size = shape[0] # gate (j) is identity t = np.zeros(shape) t[:, size:size * 2] = np.identity(size) * scale t[:, :size] = orthogonal([size, size]) t[:, size * 2:size * 3] = orthogonal([size, size]) t[:, size * 3:] = orthogonal([size, size]) return tf.constant(t, dtype=dtype) return _initializer def orthogonal_initializer(): def _initializer(shape, dtype=tf.float32, partition_info=None): return tf.constant(orthogonal(shape), dtype) return _initializer def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999): """Assume 2d [batch, values] tensor""" with tf.variable_scope(name_scope): size = x.get_shape().as_list()[1] scale = tf.get_variable( 'scale', [size], initializer=tf.constant_initializer(0.1)) offset = tf.get_variable('offset', [size]) pop_mean = tf.get_variable( 'pop_mean', [size], initializer=tf.zeros_initializer(), trainable=False) pop_var = tf.get_variable( 'pop_var', [size], initializer=tf.ones_initializer(), trainable=False) batch_mean, batch_var = tf.nn.moments(x, [0]) train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) def batch_statistics(): with tf.control_dependencies([train_mean_op, train_var_op]): return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon) def population_statistics(): return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon) return tf.cond(training, batch_statistics, population_statistics) ================================================ FILE: hierarchical_attention_research/han_model/bn_lstm_test.py ================================================ import time import uuid import os import numpy as np import tensorflow as tf from tensorflow.python.ops.rnn import dynamic_rnn from bn_lstm import LSTMCell, BNLSTMCell, orthogonal_initializer from tensorflow.examples.tutorials.mnist import input_data batch_size = 100 hidden_size = 100 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) x = tf.placeholder(tf.float32, [None, 784]) training = tf.placeholder(tf.bool) x_inp = tf.expand_dims(x, -1) lstm = BNLSTMCell(hidden_size, training) #LSTMCell(hidden_size) #c, h initialState = ( tf.random_normal([batch_size, hidden_size], stddev=0.1), tf.random_normal([batch_size, hidden_size], stddev=0.1)) outputs, state = dynamic_rnn(lstm, x_inp, initial_state=initialState, dtype=tf.float32) _, final_hidden = state W = tf.get_variable('W', [hidden_size, 10], initializer=orthogonal_initializer()) b = tf.get_variable('b', [10]) y = tf.nn.softmax(tf.matmul(final_hidden, W) + b) y_ = tf.placeholder(tf.float32, [None, 10]) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) optimizer = tf.train.AdamOptimizer() gvs = optimizer.compute_gradients(cross_entropy) capped_gvs = [(None if grad is None else tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs] train_step = optimizer.apply_gradients(capped_gvs) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Summaries tf.summary.scalar("accuracy", accuracy) tf.summary.scalar("xe_loss", cross_entropy) for (grad, var), (capped_grad, _) in zip(gvs, capped_gvs): if grad is not None: tf.summary.histogram('grad/{}'.format(var.name), capped_grad) tf.summary.histogram('capped_fraction/{}'.format(var.name), tf.nn.zero_fraction(grad - capped_grad)) tf.summary.histogram('weight/{}'.format(var.name), var) merged = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session() sess.run(init) logdir = 'logs/' + str(uuid.uuid4()) os.makedirs(logdir) print('logging to ' + logdir) writer = tf.summary.trainWriter(logdir, sess.graph) current_time = time.time() print("Using population statistics (training: False) at test time gives worse results than batch statistics") for i in range(100000): batch_xs, batch_ys = mnist.train.next_batch(batch_size) loss, _ = sess.run([cross_entropy, train_step], feed_dict={x: batch_xs, y_: batch_ys, training: True}) step_time = time.time() - current_time current_time = time.time() if i % 100 == 0: batch_xs, batch_ys = mnist.validation.next_batch(batch_size) summary. _str = sess.run(merged, feed_dict={x: batch_xs, y_: batch_ys, training: False}) writer.summary.add_str, i) print(loss, step_time) ================================================ FILE: hierarchical_attention_research/han_model/data_util.py ================================================ import numpy as np def batch(inputs): batch_size = len(inputs) document_sizes = np.array([len(doc) for doc in inputs], dtype=np.int32) document_size = document_sizes.max() sentence_sizes_ = [[len(sent) for sent in doc] for doc in inputs] sentence_size = max(map(max, sentence_sizes_)) b = np.zeros( shape=[batch_size, document_size, sentence_size], dtype=np.int32) # == PAD sentence_sizes = np.zeros(shape=[batch_size, document_size], dtype=np.int32) for i, document in enumerate(inputs): for j, sentence in enumerate(document): sentence_sizes[i, j] = sentence_sizes_[i][j] for k, word in enumerate(sentence): b[i, j, k] = word return b, document_sizes, sentence_sizes ================================================ FILE: hierarchical_attention_research/han_model/model_components.py ================================================ import tensorflow as tf import tensorflow.contrib.layers as layers try: from tensorflow.contrib.rnn import LSTMStateTuple except ImportError: LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple def bidirectional_rnn(cell_fw, cell_bw, inputs_embedded, input_lengths, scope=None): """Bidirecional RNN with concatenated outputs and states""" with tf.variable_scope(scope or 'birnn') as scope: ((fw_outputs, bw_outputs), (fw_state, bw_state)) = ( tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs_embedded, sequence_length=input_lengths, dtype=tf.float32, swap_memory=True, scope=scope)) outputs = tf.concat((fw_outputs, bw_outputs), 2) def concatenate_state(fw_state, bw_state): if isinstance(fw_state, LSTMStateTuple): state_c = tf.concat((fw_state.c, bw_state.c), 1, name='bidirectional_concat_c') state_h = tf.concat((fw_state.h, bw_state.h), 1, name='bidirectional_concat_h') state = LSTMStateTuple(c=state_c, h=state_h) return state elif isinstance(fw_state, tf.Tensor): state = tf.concat((fw_state, bw_state), 1, name='bidirectional_concat') return state elif (isinstance(fw_state, tuple) and isinstance(bw_state, tuple) and len(fw_state) == len(bw_state)): # multilayer state = tuple( concatenate_state(fw, bw) for fw, bw in zip(fw_state, bw_state)) return state else: raise ValueError('unknown state type: {}'.format((fw_state, bw_state))) state = concatenate_state(fw_state, bw_state) return outputs, state def task_specific_attention(inputs, output_size, initializer=layers.xavier_initializer(), activation_fn=tf.tanh, scope=None): """ Performs task-specific attention reduction, using learned attention context vector (constant within task of interest). Args: inputs: Tensor of shape [batch_size, units, input_size] `input_size` must be static (known) `units` axis will be attended over (reduced from output) `batch_size` will be preserved output_size: Size of output's inner (feature) dimension Returns: outputs: Tensor of shape [batch_size, output_dim]. """ assert len( inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None with tf.variable_scope(scope or 'attention') as scope: attention_context_vector = tf.get_variable( name='attention_context_vector', shape=[output_size], initializer=initializer, dtype=tf.float32) input_projection = layers.fully_connected( inputs, output_size, activation_fn=activation_fn, scope=scope) vector_attn = tf.reduce_sum( tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True) attention_weights = tf.nn.softmax(vector_attn, dim=1) weighted_projection = tf.multiply(input_projection, attention_weights) outputs = tf.reduce_sum(weighted_projection, axis=1) return outputs ================================================ FILE: hierarchical_attention_research/han_model/requirements.txt ================================================ cymem==1.31.2 cytoolz==0.8.2 dill==0.2.7.1 en-core-web-sm==2.0.0 msgpack-numpy==0.4.1 msgpack-python==0.5.6 murmurhash==0.28.0 numpy==1.22.0 pathlib==1.0.1 plac==0.9.6 preshed==1.0.0 regex==2017.4.5 six==1.11.0 spacy==2.0.11 termcolor==1.1.0 thinc==6.10.2 toolz==0.9.0 tqdm==4.22.0 ujson==5.4.0 wrapt==1.10.11 ================================================ FILE: hierarchical_attention_research/han_model/worker.py ================================================ #!/usr/bin/env python3 import argparse parser = argparse.ArgumentParser() parser.add_argument('--task', default='yelp', choices=['yelp']) parser.add_argument('--mode', default='train', choices=['train', 'eval']) parser.add_argument('--checkpoint-frequency', type=int, default=100) parser.add_argument('--eval-frequency', type=int, default=10000) parser.add_argument('--batch-size', type=int, default=30) parser.add_argument('--device', default='/cpu:0') parser.add_argument('--max-grad-norm', type=float, default=5.0) parser.add_argument('--lr', type=float, default=0.001) args = parser.parse_args() import importlib import os import pickle import random import time from collections import Counter, defaultdict import numpy as np import pandas as pd import spacy import tensorflow as tf from tensorflow.contrib.tensorboard.plugins import projector from tqdm import tqdm import ujson from data_util import batch task_name = args.task task = importlib.import_module(task_name) checkpoint_dir = os.path.join(task.train_dir, 'checkpoint') tflog_dir = os.path.join(task.train_dir, 'tflog') checkpoint_name = task_name + '-model' checkpoint_dir = os.path.join(task.train_dir, 'checkpoints') checkpoint_path = os.path.join(checkpoint_dir, checkpoint_name) # @TODO: move calculation into `task file` trainset = task.read_trainset(epochs=1) class_weights = pd.Series(Counter([l for _, l in trainset])) class_weights = 1 / (class_weights / class_weights.mean()) class_weights = class_weights.to_dict() vocab = task.read_vocab() labels = task.read_labels() classes = max(labels.values()) + 1 vocab_size = task.vocab_size labels_rev = {int(v): k for k, v in labels.items()} vocab_rev = {int(v): k for k, v in vocab.items()} NUM_RNN_LAYERS = 5 def HAN_model_1(session, restore_only=False): """Hierarhical Attention Network""" import tensorflow as tf try: from tensorflow.contrib.rnn import GRUCell, MultiRNNCell, DropoutWrapper except ImportError: MultiRNNCell = tf.nn.rnn_cell.MultiRNNCell GRUCell = tf.nn.rnn_cell.GRUCell from bn_lstm import BNLSTMCell from HAN_model import HANClassifierModel is_training = tf.placeholder(dtype=tf.bool, name='is_training') def bn_cell(): return BNLSTMCell(80, is_training) # h-h batchnorm LSTMCell # cell = GRUCell(30) fw_word_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)]) bw_word_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)]) fw_sentence_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)]) bw_sentence_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)]) model = HANClassifierModel( vocab_size=vocab_size, embedding_size=200, classes=classes, fw_word_cell=fw_word_cell, bw_word_cell=bw_word_cell, fw_sentence_cell=fw_sentence_cell, bw_sentence_cell=bw_sentence_cell, word_output_size=100, sentence_output_size=100, device=args.device, learning_rate=args.lr, max_grad_norm=args.max_grad_norm, dropout_keep_proba=0.5, is_training=is_training, ) saver = tf.train.Saver(tf.global_variables()) checkpoint = tf.train.get_checkpoint_state(checkpoint_dir) if checkpoint: print('Reading model parameters from %s' % checkpoint.model_checkpoint_path) saver.restore(session, checkpoint.model_checkpoint_path) elif restore_only: raise FileNotFoundError('Cannot restore model') else: print('Created model with fresh parameters') session.run(tf.global_variables_initializer()) # tf.get_default_graph().finalize() return model, saver model_fn = HAN_model_1 def decode(ex): print('text: ' + '\n'.join( [' '.join([vocab_rev.get(wid, '') for wid in sent]) for sent in ex[0]])) print('label: ', labels_rev[ex[1]]) print('data loaded') def batch_iterator(dataset, batch_size, max_epochs): for i in range(max_epochs): xb = [] yb = [] for ex in dataset: x, y = ex xb.append(x) yb.append(y) if len(xb) == batch_size: yield xb, yb xb, yb = [], [] def ev(session, model, dataset): predictions = [] labels = [] examples = [] for x, y in tqdm(batch_iterator(dataset, args.batch_size, 1)): examples.extend(x) labels.extend(y) predictions.extend( session.run(model.prediction, model.get_feed_data(x, is_training=False))) df = pd.DataFrame({ 'predictions': predictions, 'labels': labels, 'examples': examples }) return df def evaluate(dataset): tf.reset_default_graph() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as s: model, _ = model_fn(s, restore_only=True) df = ev(s, model, dataset) print((df['predictions'] == df['labels']).mean()) import IPython IPython.embed() def train(): tf.reset_default_graph() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as s: model, saver = model_fn(s) summary_writer = tf.summary.FileWriter( tflog_dir, graph=tf.get_default_graph()) # Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto # pconf = projector.ProjectorConfig() # # You can add multiple embeddings. Here we add only one. # embedding = pconf.embeddings.add() # embedding.tensor_name = m.embedding_matrix.name # # Link this tensor to its metadata file (e.g. labels). # embedding.metadata_path = vocab_tsv # print(embedding.tensor_name) # Saves a configuration file that TensorBoard will read during startup. for i, (x, y) in enumerate( batch_iterator(task.read_trainset(epochs=3), args.batch_size, 300)): fd = model.get_feed_data(x, y, class_weights=class_weights) # import IPython # IPython.embed() t0 = time.clock() step, summaries, loss, accuracy, _ = s.run([ model.global_step, model.summary_op, model.loss, model.accuracy, model.train_op, ], fd) td = time.clock() - t0 summary_writer.add_summary(summaries, global_step=step) # projector.visualize_embeddings(summary_writer, pconf) if step % 1 == 0: print('step %s, loss=%s, accuracy=%s, t=%s, inputs=%s' % (step, loss, accuracy, round(td, 2), fd[model.inputs].shape)) if step != 0 and step % args.checkpoint_frequency == 0: print('checkpoint & graph meta') saver.save(s, checkpoint_path, global_step=step) print('checkpoint done') if step != 0 and step % args.eval_frequency == 0: print('evaluation at step %s' % i) dev_df = ev(s, model, task.read_devset(epochs=1)) print('dev accuracy: %.2f' % (dev_df['predictions'] == dev_df['labels']).mean()) def main(): if args.mode == 'train': train() elif args.mode == 'eval': evaluate(task.read_devset(epochs=1)) if __name__ == '__main__': main() ================================================ FILE: hierarchical_attention_research/han_model/yelp.py ================================================ import os import pickle train_dir = os.path.join(os.path.curdir, 'yelp') data_dir = os.path.join(train_dir, 'data') for dir in [train_dir, data_dir]: if not os.path.exists(dir): os.makedirs(dir) trainset_fn = os.path.join(data_dir, 'train.dataset') devset_fn = os.path.join(data_dir, 'dev.dataset') testset_fn = os.path.join(data_dir, 'test.dataset') vocab_fn = os.path.join(data_dir, 'vocab.pickle') reserved_tokens = 5 unknown_id = 2 vocab_size = 50001 def _read_dataset(fn, review_max_sentences=30, sentence_max_length=30, epochs=1): c = 0 while 1: c += 1 if epochs > 0 and c > epochs: return print('epoch %s' % c) with open(fn, 'rb') as f: try: while 1: x, y = pickle.load(f) # clip review to specified max lengths x = x[:review_max_sentences] x = [sent[:sentence_max_length] for sent in x] y -= 1 assert y >= 0 and y <= 4 yield x, y except EOFError: continue def read_trainset(epochs=1): return _read_dataset(trainset_fn, epochs=epochs) def read_devset(epochs=1): return _read_dataset(devset_fn, epochs=epochs) def read_vocab(): with open(vocab_fn, 'rb') as f: return pickle.load(f) def read_labels(): return {i: i for i in range(5)} ================================================ FILE: hierarchical_attention_research/han_model/yelp_prepare.py ================================================ import argparse parser = argparse.ArgumentParser() parser.add_argument('review_path') args = parser.parse_args() import os import ujson as json import spacy import pickle import random from tqdm import tqdm from collections import defaultdict import numpy as np from yelp import * en = spacy.load('en') def read_reviews(): with open(args.review_path, 'rb') as f: for line in f: yield json.loads(line) def build_word_frequency_distribution(): path = os.path.join(data_dir, 'word_freq.pickle') try: with open(path, 'rb') as freq_dist_f: freq_dist_f = pickle.load(freq_dist_f) print('frequency distribution loaded') return freq_dist_f except IOError: pass print('building frequency distribution') freq = defaultdict(int) for i, review in enumerate(read_reviews()): doc = en.tokenizer(review['text']) for token in doc: freq[token.orth_] += 1 if i % 10000 == 0: with open(path, 'wb') as freq_dist_f: pickle.dump(freq, freq_dist_f) print('dump at {}'.format(i)) return freq def build_vocabulary(lower=3, n=50000): try: with open(vocab_fn, 'rb') as vocab_file: vocab = pickle.load(vocab_file) print('vocabulary loaded') return vocab except IOError: print('building vocabulary') freq = build_word_frequency_distribution() top_words = list(sorted(freq.items(), key=lambda x: -x[1]))[:n - lower + 1] vocab = {} i = lower for w, freq in top_words: vocab[w] = i i += 1 with open(vocab_fn, 'wb') as vocab_file: pickle.dump(vocab, vocab_file) return vocab UNKNOWN = 2 def make_data(split_points=(0.8, 0.94)): train_ratio, dev_ratio = split_points vocab = build_vocabulary() train_f = open(trainset_fn, 'wb') dev_f = open(devset_fn, 'wb') test_f = open(testset_fn, 'wb') try: for review in tqdm(read_reviews()): x = [] for sent in en(review['text']).sents: x.append([vocab.get(tok.orth_, UNKNOWN) for tok in sent]) y = review['stars'] r = random.random() if r < train_ratio: f = train_f elif r < dev_ratio: f = dev_f else: f = test_f pickle.dump((x, y), f) except KeyboardInterrupt: pass train_f.close() dev_f.close() test_f.close() if __name__ == '__main__': make_data() ================================================ FILE: kaggle-classification/.gitignore ================================================ # Directories to save model checkpoints runs/ model/* saved_models/* # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # Python virtual environment directory env/ # Don't version control data: this is the directory where data is downloaded to. local_data/ # Temporary directory for hacking stuff in tmp/ # Comet API key comet_api_key.txt ================================================ FILE: kaggle-classification/README.md ================================================ # Toxic Comment Classification Kaggle Challenge This directory is a place to play around with solutions for the [Toxic Comment Classification Kaggle challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge). The challenge was created by the Jigsaw Conversation AI team in December 2017 and the it ends in February 2018. These models are meant to be simple baselines created independently from the Google infrastructure. ## To Run Locally 1. Setup a (virtualenv)[https://virtualenvwrapper.readthedocs.io/en/latest/] for the project (recommended, but technically optional). Python 2: ``` python -m virtualenv env ``` Python 3: ``` python3 -m venv env ``` From either to enter your virtual env: ```shell source env/bin/activate ``` 2. Install library dependencies: ```shell pip install -r requirements.txt ``` 3. For training locally, download the training (`train.csv`) and test (`test.csv`) data from the [Kaggle challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/data). If you have [a Kaggle API Key](https://github.com/Kaggle/kaggle-api#api-credentials) setup, you can use the [Kaggle api tool](https://github.com/Kaggle/kaggle-api) to download these files by running: ```shell kaggle competitions download -c jigsaw-toxic-comment-classification-challenge -p ./ mv jigsaw-toxic-comment-classification-challenge local_data for z in local_data/*.zip; do unzip -x $z -d local_data/; done ``` Note: the `kaggle` command is installed from the `pip` and specified in `requirements.txt`. 4. Run a model on a given class (e.g. 'toxic' or 'obscene'). There are examples of how to run the model locally and using ml-engine in `bin/run_local` and `bin/run` respectively. Note: to run in google cloud, you will need to be authenticated with Google Cloud (you can run `gcloud auth application-default login` to do this) and you must have access to the cloud bucket where the data is located (you can test this by running `gcloud storage ls gs://kaggle-model-experiments/`). ## Available Models * `bag_of_words` - bag of words model with a learned word-embedding layer * `cnn` - a 2 layer ConvNet ## Data Copies of the training and test data are available in Google Storage from the wikidetox project. * train.csv: gs://kaggle-model-experiments/train.csv * test.csv: gs://kaggle-model-experiments/test.csv ================================================ FILE: kaggle-classification/__init__.py ================================================ ================================================ FILE: kaggle-classification/bin/cancel-job ================================================ #!/bin/bash gcloud ml-engine jobs cancel $1 ================================================ FILE: kaggle-classification/bin/ls-jobs ================================================ #!/bin/bash DATE=`date '+%Y-%m-%d'` gcloud ml-engine jobs list | grep $DATE ================================================ FILE: kaggle-classification/bin/run ================================================ #!/bin/bash # # A script to train the kaggle model remotely using ml-engine. # # To run with default hyperparameters from the kaggle-classification directory just enter: # './bin/run' # # To run with hyperparameter tuning, enter: # './bin/run -c hparam_config.yaml' # # # Setup Steps: # 1. Install the gcloud SDK # 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]` # 3. Put the train and test data in Cloud Storage, `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/` # # Edit these! BUCKET_NAME=kaggle-model-experiments CONFIG=gpu_config.yaml JOB_NAME=${USER}_kaggle_training # Note: this must be compatible with cells that have GPUs. us-central1 works. # See: https://cloud.google.com/ml-engine/docs/using-gpus REGION=us-central1 DATE=`date '+%Y%m%d_%H%M%S'` OUTPUT_PATH=gs://${BUCKET_NAME}/models/${USER}/${DATE} while getopts :c:h opt; do case ${opt} in h) echo "Usage: run [-c config_filename.yaml]" echo "Flags: " echo -e " -c Specify a config file (e.g. use hparam_config to enable hyperparameter tuning)" exit 0;; c) echo "Using custom config ${OPTARG}" CONFIG=${OPTARG};; :) echo "Error: ${OPTARG} requires an argument." echo "Use 'run -h' for help." exit 1;; \?) echo "Invalid flag. Use 'run -h' for help." exit 1;; esac done echo "Writing to $OUTPUT_PATH" # Remote gcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \ --job-dir ${OUTPUT_PATH} \ --runtime-version 1.4 \ --config ${CONFIG} \ --module-name trainer.model \ --package-path trainer/ \ --region $REGION \ --verbosity debug \ -- \ --train_data gs://${BUCKET_NAME}/train.csv \ --y_class toxic \ --train_steps 5000 \ --saved_model_dir gs://${BUCKET_NAME}/saved_graph/${USER}/${DATE} \ --model cnn echo "You can view the tensorboard for this job with the command:" echo "" echo -e "\t tensorboard --logdir=${OUTPUT_PATH}" echo "" echo "And on your browser navigate to:" echo "" echo -e "\t http://localhost:6006/#scalars" echo "" echo "This will populate after a model checkpoint is saved." echo "" ================================================ FILE: kaggle-classification/bin/run_keras.sh ================================================ #!/bin/bash # # A script to train the kaggle model remotely using ml-engine. # # Setup Steps: # 1. Install the gcloud SDK # 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]` # 3. Put the train and test data in Cloud Storage, # `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/resources` # # Edit these! BUCKET_NAME=kaggle-model-experiments JOB_NAME=${USER}_kaggle_training REGION=us-east1 INPUT_PATH=gs://${BUCKET_NAME}/resources DATE=`date '+%Y%m%d_%H%M%S'` OUTPUT_PATH=gs://${BUCKET_NAME}/keras_runs/${USER}/${DATE} LOG_PATH=${OUTPUT_PATH}/logs/ HPARAM_CONFIG=keras_hparam_config.yaml COMET_KEY_FILE='comet_api_key.txt' COMET_KEY=$(cat ${COMET_KEY_FILE}) COMET_PROJECT_NAME='compare-models' echo "Writing to $OUTPUT_PATH" # Remote gcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \ --job-dir=$OUTPUT_PATH \ --runtime-version=1.8 \ --module-name=keras_trainer.model \ --package-path=keras_trainer \ --region=$REGION \ --verbosity=debug \ --config=${HPARAM_CONFIG} \ -- \ --train_path=${INPUT_PATH}/train.csv \ --test_path=${INPUT_PATH}/validation.csv \ --embeddings_path=${INPUT_PATH}/glove.6B/glove.6B.300d.txt \ --log_path=${LOG_PATH} \ --comet_key=${COMET_KEY} \ --comet_project_name=${COMET_PROJECT_NAME} \ --model_type=single_layer_cnn echo "You can view the tensorboard for this job with the command:" echo "" echo -e "\t tensorboard --logdir=${LOG_PATH}" echo "" echo "And on your browser navigate to:" echo "" echo -e "\t http://localhost:6006/#scalars" echo "" echo "This will populate after a model checkpoint is saved." echo "" ================================================ FILE: kaggle-classification/bin/run_keras_local.sh ================================================ #!/bin/bash DATE=`date '+%Y%m%d_%H%M%S'` OUTPUT_PATH=runs/${DATE} INPUT_PATH=local_data LOG_PATH=${OUTPUT_PATH}/logs/ COMET_KEY_FILE='comet_api_key.txt' COMET_KEY=$(cat ${COMET_KEY_FILE}) COMET_PROJECT_NAME='compare-models' echo "You can view the tensorboard for this job with the command:" echo "" echo -e "\t tensorboard --logdir=${LOG_PATH}" echo "" echo "And on your browser navigate to:" echo "" echo -e "\t http://localhost:6006/#scalars" echo "" echo "This will populate after a model checkpoint is saved." echo "" python -m keras_trainer.model \ --train_path=${INPUT_PATH}/train.csv \ --test_path=${INPUT_PATH}/validation.csv \ --embeddings_path=${INPUT_PATH}/glove.6B/glove.6B.100d.txt \ --job-dir=${OUTPUT_PATH} \ --log_path=${LOG_PATH} \ --comet_key=${COMET_KEY} \ --comet_project_name=${COMET_PROJECT_NAME} \ --model_type=rnn ================================================ FILE: kaggle-classification/bin/run_local ================================================ #!/bin/bash # # A script to train the kaggle model locally. # Assumes that train.csv and test.csv are downloaded into the local_data/ # directory. # DATE=`date '+%Y%m%d_%H%M%S'` gcloud ml-engine local train \ --module-name=trainer.model \ --package-path=trainer \ --job-dir=model/${DATE} -- \ --train_data=local_data/train.csv \ --y_class=toxic \ --train_steps=100 ================================================ FILE: kaggle-classification/bin/stream-logs ================================================ #!/bin/bash gcloud ml-engine jobs stream-logs $1 ================================================ FILE: kaggle-classification/config.yaml ================================================ trainingInput: ## BASIC_GPU uses single NVIDIA Tesla K80 GPU. scaleTier: BASIC_GPU ## Custom scaleTier needed for using > 1 GPU machines. # scaleTier: CUSTOM # masterType: complex_model_m_gpu # workerType: complex_model_m_gpu # parameterServerType: large_model # workerCount: 9 # parameterServerCount: 3 ================================================ FILE: kaggle-classification/gpu_config.yaml ================================================ trainingInput: ## BASIC_GPU uses single NVIDIA Tesla K80 GPU. scaleTier: BASIC_GPU ## Custom scaleTier needed for using > 1 GPU machines. # scaleTier: CUSTOM # masterType: complex_model_m_gpu # workerType: complex_model_m_gpu # parameterServerType: large_model # workerCount: 9 # parameterServerCount: 3 ================================================ FILE: kaggle-classification/hparam_config.yaml ================================================ trainingInput: ## BASIC_GPU uses single NVIDIA Tesla K80 GPU. scaleTier: BASIC_GPU ## Custom scaleTier needed for using > 1 GPU machines. # scaleTier: CUSTOM # masterType: complex_model_m_gpu # workerType: complex_model_m_gpu # parameterServerType: large_model # workerCount: 9 # parameterServerCount: 3 hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: auc maxTrials: 100 maxParallelTrials: 10 enableTrialEarlyStopping: TRUE params: - parameterName: embedding_size type: INTEGER minValue: 50 maxValue: 200 scaleType: UNIT_LINEAR_SCALE - parameterName: num_filters type: INTEGER minValue: 10 maxValue: 200 scaleType: UNIT_LINEAR_SCALE - parameterName: dropout_keep_prob type: DOUBLE minValue: 0.5 maxValue: 1 scaleType: UNIT_LINEAR_SCALE - parameterName: learning_rate type: DOUBLE minValue: 0.000001 maxValue: 0.1 scaleType: UNIT_LOG_SCALE ================================================ FILE: kaggle-classification/keras_hparam_config.yaml ================================================ trainingInput: ## BASIC_GPU uses single NVIDIA Tesla K80 GPU. pythonVersion: '3.5' scaleTier: BASIC_GPU ## Custom scaleTier needed for using > 1 GPU machines. # scaleTier: CUSTOM # masterType: complex_model_m_gpu # workerType: complex_model_m_gpu # parameterServerType: large_model # workerCount: 9 # parameterServerCount: 3 hyperparameters: goal: MAXIMIZE hyperparameterMetricTag: val_auc_roc maxTrials: 20 maxParallelTrials: 3 enableTrialEarlyStopping: TRUE params: - parameterName: learning_rate type: DOUBLE minValue: 0.00005 maxValue: 0.1 scaleType: UNIT_LOG_SCALE - parameterName: dropout_rate type: DOUBLE minValue: 0 maxValue: 1 scaleType: UNIT_LINEAR_SCALE - parameterName: batch_size type: DISCRETE discreteValues: - 16 - 32 - 64 - 128 - 256 ================================================ FILE: kaggle-classification/keras_trainer/__init__.py ================================================ ================================================ FILE: kaggle-classification/keras_trainer/base_model.py ================================================ """Base model class used by the ModelRunner""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from abc import ABCMeta, abstractmethod from keras.layers import Input from keras.models import Model class BaseModel(metaclass=ABCMeta): """Base class for model runner""" @abstractmethod def get_model(self) -> Model: raise NotImplementedError('Method get_model needs to be implemented.') ================================================ FILE: kaggle-classification/keras_trainer/cnn_with_attention.py ================================================ """Model class for a single layer CNN""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras.layers import Conv1D from keras.layers import Dense from keras.layers import Dropout from keras.layers import Embedding from keras.layers import Flatten from keras.layers import Input from keras.layers import AveragePooling1D from keras.layers import Activation from keras.layers import Concatenate from keras.layers import Multiply from keras.models import Model from keras.layers import Permute from keras_trainer import base_model from keras.layers import Activation from keras_trainer.custom_metrics import auc_roc class CNNWithAttention(base_model.BaseModel): """Single Layer Based CNN hparams: embedding_dim vocab_size sequence_length dropout_rate train_embedding """ def __init__(self, embeddings_matrix, hparams, labels): self.embeddings_matrix = embeddings_matrix self.hparams = hparams self.labels = labels self.num_labels = len(labels) def get_model(self): I = Input(shape=(self.hparams.sequence_length,), dtype='float32') E = Embedding( self.hparams.vocab_size, self.hparams.embedding_dim, weights=[self.embeddings_matrix], input_length=self.hparams.sequence_length, trainable=self.hparams.train_embedding)( I) C = [] A = [] P = [] for i, size in enumerate(self.hparams.filter_sizes): C.append( Conv1D( self.hparams.num_filters[i], size, activation='relu', padding='same')(E)) A.append( Dense(self.hparams.attention_intermediate_size, activation='relu')(C[i])) A[i] = Dense(1, use_bias=False)(A[i]) # Permute trick to apply softmax to second to last layer. A[i] = Permute((2, 1))(A[i]) A[i] = Activation('softmax')(A[i]) A[i] = Permute((2, 1))(A[i]) P.append(Multiply()([A[i], C[i]])) P[i] = AveragePooling1D( self.hparams.sequence_length, padding='same')( P[i]) X = Concatenate(axis=-1)(P) X = Flatten()(X) X = Dropout(self.hparams.dropout_rate)(X) X = Dense(128, activation='relu')(X) X = Dropout(self.hparams.dropout_rate)(X) Output = Dense(self.num_labels, activation='sigmoid', name='outputs')(X) model = Model(inputs=I, outputs=Output) model.compile( optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy', auc_roc]) print(model.summary()) return model ================================================ FILE: kaggle-classification/keras_trainer/custom_metrics.py ================================================ """Custom metrics used by Keras models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf def auc_roc(y_true, y_pred): # any tensorflow metric y_true = tf.to_int32(tf.greater(y_true, 0.5)) value, update_op = tf.metrics.auc(y_true, y_pred) # find all variables created for this metric metric_vars = [ i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1] ] # Add metric variables to GLOBAL_VARIABLES collection. # They will be initialized for new session. for v in metric_vars: tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v) # force update metric values with tf.control_dependencies([update_op]): value = tf.identity(value) return value ================================================ FILE: kaggle-classification/keras_trainer/model.py ================================================ """Classifiers for the Toxic Comment Classification Kaggle challenge, https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge To run locally: python keras-trainer/model.py """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse import json import numpy as np import pandas as pd import os import os.path from comet_ml import Experiment import tensorflow as tf from keras.callbacks import EarlyStopping from keras.callbacks import ModelCheckpoint from keras.callbacks import TensorBoard from keras.models import load_model from keras.preprocessing.sequence import pad_sequences from keras.preprocessing.text import Tokenizer from os.path import expanduser from sklearn import metrics from tensorflow.python.framework.errors_impl import NotFoundError from keras_trainer.cnn_with_attention import CNNWithAttention from keras_trainer.single_layer_cnn import SingleLayerCnn from keras_trainer.rnn import RNNModel from keras_trainer.custom_metrics import auc_roc from keras_trainer.base_model import BaseModel from typing import Dict, Type FLAGS = None TEMPORARY_MODEL_PATH = 'model.h5' VALID_MODELS = { 'cnn_with_attention': CNNWithAttention, 'single_layer_cnn': SingleLayerCnn, 'rnn': RNNModel } # type: Dict[str, Type[BaseModel]] DEFAULT_HPARAMS = tf.contrib.training.HParams( learning_rate=0.00005, dropout_rate=0.5, batch_size=128, epochs=1, sequence_length=250, embedding_dim=300, train_embedding=False, model_type='single_layer_cnn', filter_sizes=[3, 4, 5], num_filters=[128, 128, 128], attention_intermediate_size=128) class ModelRunner(): """Toxicity model using CNN + Attention""" def __init__(self, job_dir, embeddings_path, log_path, hparams, labels): if os.path.exists(TEMPORARY_MODEL_PATH): raise FileExistsError('The following file path already exists: {}'.format( TEMPORARY_MODEL_PATH)) self.job_dir = job_dir self.model_path = os.path.join(job_dir, 'model.h5') self.embeddings_path = embeddings_path self.log_path = log_path self.hparams = hparams self.labels = [l.strip() for l in labels.split(',')] print('Setting up tokenizer...') self.tokenizer = self._setup_tokenizer() print('Setting up embedding matrix...') self.embeddings_matrix = self._setup_embeddings_matrix() print('Loading model...') self._load_model() def train(self, train): if self.hparams.model_type in VALID_MODELS: model = VALID_MODELS[self.hparams.model_type]( self.embeddings_matrix, self.hparams, self.labels).get_model() # type: BaseModel else: raise ValueError('You have specified an invalid model type.') train_comment = self._prep_texts(train['comment_text']) train_labels = np.array(list(zip(*[train[label] for label in self.labels]))) callbacks = [ ModelCheckpoint( TEMPORARY_MODEL_PATH, save_best_only=True, verbose=True), EarlyStopping(monitor='val_loss', mode='auto'), TensorBoard(self.log_path) ] model.fit( x=train_comment, y=train_labels, batch_size=int(self.hparams.batch_size), epochs=self.hparams.epochs, validation_split=0.1, callbacks=callbacks, verbose=2) # Output one line per epoch # Necessary because we can't save h5 files to cloud storage directly via # Checkpoint. tf.gfile.MakeDirs(self.job_dir) tf.gfile.Copy(TEMPORARY_MODEL_PATH, self.model_path, overwrite=True) tf.gfile.Remove(TEMPORARY_MODEL_PATH) print('Saved model to {}'.format(self.model_path)) self._load_model() def predict(self, texts): data = self._prep_texts(texts) return self.model.predict(data) def score_metric(self, data, metric_name, metric_fn): """Prints metric scores. Args: data: Dataset containing 'comment_text' column, that will be used to get predictions, as well as label columns to compare the predictions against. metric_name (str): String to use when printing. metric_fn: function that takes labels and predictions and outputs a score """ predictions = self.predict(data['comment_text']) # Get an array where each element is a list of all the labels for the # specific instance. agg = {} for label_idx, label in enumerate(self.labels): labels = list((data[label] > 0.5).astype(int)) preds = predictions[:, label_idx] # label and pred indicies better match score = metric_fn(labels, preds) agg[label] = score print('{}: {}'.format(metric_name, agg)) if len(agg) > 1: print('Mean {}: {}'.format(metric_name, np.mean(list(agg.values())))) def score_auc(self, data): self.score_metric( data, 'ROC AUC', lambda l, p: metrics.roc_auc_score(l, p, average=None)) def score_precision(self, data): self.score_metric( data, 'Precision', lambda l, p: metrics.precision_score( l, (p > 0.5).astype(int))) def score_recall(self, data): self.score_metric( data, 'Recall', lambda l, p: metrics.recall_score(l, (p > 0.5).astype(int))) def _prep_texts(self, texts): return pad_sequences( self.tokenizer.texts_to_sequences(texts), maxlen=self.hparams.sequence_length) def _load_model(self): try: tf.gfile.Copy(self.model_path, TEMPORARY_MODEL_PATH, overwrite=True) self.model = load_model( TEMPORARY_MODEL_PATH, custom_objects={'auc_roc': auc_roc}) tf.gfile.Remove(TEMPORARY_MODEL_PATH) print('Model loaded from: {}'.format(self.model_path)) except NotFoundError: print('Could not load model at: {}'.format(self.model_path)) def _setup_tokenizer(self): words = [] with tf.gfile.Open(self.embeddings_path, 'r') as f: for line in f: words.append(line.split()[0]) tokenizer = Tokenizer(lower=True, oov_token='') tokenizer.fit_on_texts(words) self.hparams.vocab_size = len(tokenizer.word_index) + 1 return tokenizer def _setup_embeddings_matrix(self): embeddings_matrix = np.zeros((self.hparams.vocab_size, self.hparams.embedding_dim)) with tf.gfile.Open(self.embeddings_path, 'r') as f: for line in f: values = line.split() word = values[0] if word in self.tokenizer.word_index: word_idx = self.tokenizer.word_index[word] word_embedding = np.asarray(values[1:], dtype='float32') embeddings_matrix[word_idx] = word_embedding embeddings_matrix[self.hparams.vocab_size - 1] = embeddings_matrix.mean( axis=0) return embeddings_matrix if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--train_path', type=str, default='local_data/train.csv', help='Path to the training data.') parser.add_argument( '--test_path', type=str, default='local_data/validation.csv', help='Path to the test data.') parser.add_argument( '--embeddings_path', type=str, default='local_data/glove.6B/glove.6B.100d.txt', help='Path to the embeddings.') parser.add_argument( '--job-dir', type=str, default='local_data/', help='Path to model file.') parser.add_argument( '--log_path', type=str, default='local_data/logs/', help='Path to write tensorboard logs.') parser.add_argument( '--comet_key', type=str, default=None, help='Path to file containing comet.ml api key. Set to None to disable comet.ml.' ) parser.add_argument( '--comet_project_name', type=str, default=None, help='Name of comet project that tracks results. Must be set if comet_key is.' ) parser.add_argument( '--labels', default='toxic,severe_toxic,obscene,threat,insult,identity_hate', help='A comma separated list of labels to predict.') parser.add_argument( '--model_type', default='single_layer_cnn', help='Model type. Valid choices are {}'.format(list(VALID_MODELS.keys()))) # Hyper-parameters parser.add_argument( '--learning_rate', type=float, default=0.00005, help='Learning rate.') parser.add_argument( '--dropout_rate', type=float, default=0.5, help='Dropout rate.') parser.add_argument('--batch_size', type=int, default=64, help='Batch size.') FLAGS = parser.parse_args() hparams = DEFAULT_HPARAMS hparams.learning_rate = FLAGS.learning_rate hparams.dropout_rate = FLAGS.dropout_rate hparams.batch_size = FLAGS.batch_size hparams.model_type = FLAGS.model_type if FLAGS.comet_key: experiment = Experiment( api_key=FLAGS.comet_key, project_name=FLAGS.comet_project_name, team_name='jigsaw', auto_param_logging=False, parse_args=False) experiment.log_multiple_params(hparams.values()) experiment.log_parameter('train_data_path', FLAGS.train_path) experiment.log_parameter('test_data_path', FLAGS.test_path) experiment.log_parameter('embeddings_path', FLAGS.embeddings_path) experiment.log_parameter('model_path', FLAGS.job_dir) experiment.log_parameter('model', hparams.model_type) # Used to scope logs to a given trial (when hyper param tuning) so that they # don't run over each other. When running locally it will just use the passed # in log path. trial_log_path = os.path.join( FLAGS.log_path, json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get( 'trial', '')) model = ModelRunner( job_dir=FLAGS.job_dir, embeddings_path=FLAGS.embeddings_path, log_path=trial_log_path, hparams=hparams, labels=FLAGS.labels) with tf.gfile.Open(FLAGS.train_path, 'rb') as f: train = pd.read_csv(f, encoding='utf-8') if FLAGS.comet_key: experiment.log_dataset_hash(train) model.train(train) with tf.gfile.Open(FLAGS.test_path, 'rb') as f: test_data = pd.read_csv(f, encoding='utf-8') if FLAGS.comet_key: experiment.log_metric('test_auc', model.score_auc(test_data)) model.predict(['This sentence is benign']) ================================================ FILE: kaggle-classification/keras_trainer/rnn.py ================================================ """RNN""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras.layers import Input, GRU, Dense, Embedding, Dropout, Bidirectional, TimeDistributed, Flatten, Dot from keras.models import Model from keras_trainer import base_model from keras_trainer.custom_metrics import auc_roc class RNNModel(base_model.BaseModel): """ RNN hparams: embedding_dim vocab_size train_embedding """ def __init__(self, embeddings_matrix, hparams, labels): self.embeddings_matrix = embeddings_matrix self.hparams = hparams self.labels = labels self.num_labels = len(labels) def get_model(self): sequence_length = self.hparams.sequence_length I = Input(shape=(sequence_length,), dtype='float32') E = Embedding( self.hparams.vocab_size, self.hparams.embedding_dim, weights=[self.embeddings_matrix], input_length=sequence_length, trainable=self.hparams.train_embedding)( I) H = Bidirectional(GRU(128, return_sequences=True))(E) A = TimeDistributed( Dense(128, activation='relu'), input_shape=(sequence_length, 256))( H) A = TimeDistributed(Dense(1, activation='softmax'))(H) X = Dot((1, 1))([H, A]) X = Flatten()(X) X = Dense(128, activation='relu')(X) X = Dropout(self.hparams.dropout_rate)(X) Output = Dense(self.num_labels, activation='sigmoid')(X) model = Model(inputs=I, outputs=Output) model.compile( optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy', auc_roc]) print(model.summary()) return model ================================================ FILE: kaggle-classification/keras_trainer/single_layer_cnn.py ================================================ """Model class for a single layer CNN""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras.layers import Conv1D from keras.layers import Dense from keras.layers import Dropout from keras.layers import Embedding from keras.layers import Flatten from keras.layers import Input from keras.layers import MaxPooling1D from keras.layers import Activation from keras.layers import Concatenate from keras.optimizers import Adam from keras.models import Model from keras_trainer import base_model from keras_trainer.custom_metrics import auc_roc class SingleLayerCnn(base_model.BaseModel): """Single Layer Based CNN hparams: embedding_dim vocab_size sequence_length dropout_rate train_embedding """ def __init__(self, embeddings_matrix, hparams, labels): self.embeddings_matrix = embeddings_matrix self.hparams = hparams self.labels = labels self.num_labels = len(labels) def get_model(self) -> Model: I = Input(shape=(self.hparams.sequence_length,), dtype='float32') E = Embedding( self.hparams.vocab_size, self.hparams.embedding_dim, weights=[self.embeddings_matrix], input_length=self.hparams.sequence_length, trainable=self.hparams.train_embedding)( I) X5 = Conv1D(128, 5, activation='relu', padding='same')(E) X5 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X5) X4 = Conv1D(128, 4, activation='relu', padding='same')(E) X4 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X4) X3 = Conv1D(128, 3, activation='relu', padding='same')(E) X3 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X3) X = Concatenate(axis=-1)([X5, X4, X3]) X = Flatten()(X) X = Dropout(self.hparams.dropout_rate)(X) X = Dense(128, activation='relu')(X) X = Dropout(self.hparams.dropout_rate)(X) Output = Dense(self.num_labels, activation='sigmoid', name='outputs')(X) model = Model(inputs=I, outputs=Output) model.compile( optimizer=Adam(lr=self.hparams.learning_rate), loss='binary_crossentropy', metrics=['accuracy', auc_roc]) print(model.summary()) return model ================================================ FILE: kaggle-classification/requirements.txt ================================================ absl-py==0.1.9 astor==0.6.2 bleach==3.3.0 certifi==2024.7.4 chardet==3.0.4 comet-ml==1.0.8 enum34==1.1.6 futures==3.1.1 gast==0.2.0 grpcio==1.53.2 h5py==2.7.1 html5lib==0.999999999 idna==3.7 kaggle==1.0.5 Keras==2.13.1 Markdown==2.6.11 mypy==0.600 nltk==3.9 numpy==1.22.0 pandas==0.22.0 Pillow==10.3.0 protobuf==3.18.3 python-dateutil==2.6.1 pytz==2017.3 PyYAML==5.4 requests==2.32.2 scikit-learn==1.5.0 scipy==1.10.0 six==1.11.0 sklearn==0.0 tensorboard==1.8.0 tensorflow==2.12.1 tensorflow-tensorboard==1.5.1 termcolor==1.1.0 tflearn==0.3.2 typed-ast==1.1.0 urllib3==1.26.18 websocket-client==0.47.0 Werkzeug==3.0.3 wurlitzer==1.0.2 ================================================ FILE: kaggle-classification/setup.py ================================================ from setuptools import find_packages from setuptools import setup REQUIRED_PACKAGES = [ 'tflearn>=0.3.2', 'Keras==2.13.1', 'h5py==2.7.1', 'comet-ml==1.0.8', 'nltk>=3.3' ] setup( name='trainer', version='0.1', install_requires=REQUIRED_PACKAGES, packages=find_packages(), include_package_data=True, description='tflearn.') setup( name='keras_trainer', version='0.1', install_requires=REQUIRED_PACKAGES, packages=find_packages(), include_package_data=True, description='tflearn.') setup( name='tf_trainer', version='0.1', install_requires=REQUIRED_PACKAGES, packages=find_packages(), include_package_data=True, description='tflearn.') ================================================ FILE: kaggle-classification/trainer/__init__.py ================================================ ================================================ FILE: kaggle-classification/trainer/model.py ================================================ """Classifiers for the Toxic Comment Classification Kaggle challenge, https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge To run locally: python trainer/model.py --train_data=train.csv --predict_data=test.csv --y_class=toxic To run locally using Cloud ML Engine: gcloud ml-engine local train \ --module-name=trainer.model \ --package-path=trainer \ --job-dir=model -- \ --train_data=train.csv \ --predict_data=test.csv \ --y_class=toxic \ --train_steps=100 To run TensorBoard locally: tensorboard --logdir=model/ Then visit http://localhost:6006/ to see the dashboard. """ from __future__ import print_function from __future__ import division import argparse import os import sys import pandas as pd import tensorflow as tf from sklearn import metrics from trainer import wikidata from collections import namedtuple from tensorflow.contrib.training.python.training import hparam FLAGS = None # Data Params TRAIN_PERCENT = .8 # Percent of data to allocate to training DATA_SEED = 48173 # Random seed used for splitting the data into train/test MAX_LABEL = 2 MAX_DOCUMENT_LENGTH = 500 # Max length of each comment in words # CNN parameters DEFAULT_FILTER_SIZES = [2, 3, 4, 5] # Bag of Word parameters BOWParams = namedtuple("BOWParams", ["EMBEDDING_SIZE"]) BOW_PARAMS = BOWParams(EMBEDDING_SIZE=20) WORDS_FEATURE = "words" # Name of the input words feature. MODEL_LIST = ["bag_of_words", "cnn"] # Possible models # Training Params TRAIN_SEED = 9812 # Random seed used to initialize training BATCH_SIZE = 128 def estimator_spec_for_softmax_classification(logits, labels, mode, learning_rate): """Depending on the value of mode, different EstimatorSpec arguments are required. For mode == ModeKeys.TRAIN: required fields are loss and train_op. For mode == ModeKeys.EVAL: required field is loss. For mode == ModeKeys.PREDICT: required fields are predictions. Returns EstimatorSpec instance for softmax classification. """ predicted_classes = tf.argmax(logits, axis=1) predicted_probs = tf.nn.softmax(logits, name="softmax_tensor") predictions = { # Holds the raw logit values "logits": logits, # Holds the class id (0,1) representing the model's prediction of the most # likely species for this example. "classes": predicted_classes, # Holds the probabilities for each prediction "probs": predicted_probs, } # Represents an output of a model that can be served. export_outputs = { "output": tf.estimator.export.ClassificationOutput(scores=predicted_probs) } # PREDICT Mode if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) # Calculate loss for both TRAIN and EVAL modes loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) eval_metric_ops = { "accuracy": tf.metrics.accuracy( labels=labels, predictions=predicted_classes, name="acc_op"), "auc": tf.metrics.auc( labels=labels, predictions=predicted_classes, name="auc_op"), } # Add summary ops to the graph. These metrics will be tracked graphed # on each checkpoint by TensorBoard. tf.summary.scalar("accuracy", eval_metric_ops["accuracy"][1]) tf.summary.scalar("auc", eval_metric_ops["auc"][1]) # TRAIN Mode if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) logging_hook = tf.train.LoggingTensorHook( tensors={"loss": loss}, every_n_iter=50) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook], predictions={"loss": loss}, export_outputs=export_outputs, eval_metric_ops=eval_metric_ops) # EVAL Mode assert mode == tf.estimator.ModeKeys.EVAL return tf.estimator.EstimatorSpec( mode=mode, loss=loss, predictions=predictions, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs) def get_cnn_model(embedding_size, num_filters, dropout_keep_prob): def cnn_model(features, labels, mode): filter_sizes = DEFAULT_FILTER_SIZES with tf.name_scope("embedding"): W = tf.Variable( tf.random_uniform([n_words, embedding_size], -1.0, 1.0), name="W") embedded_chars = tf.nn.embedding_lookup(W, features[WORDS_FEATURE]) embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity hh = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Max-pooling over the outputs. Max over samples in batch and # all filters. pooled = tf.nn.max_pool( hh, ksize=[1, MAX_DOCUMENT_LENGTH - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding="VALID", name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # Add dropout in training with tf.name_scope("dropout"): # Set dropout rate to 1 (disable dropout) by default h_drop = tf.nn.dropout(h_pool_flat, 1.0) if mode == tf.estimator.ModeKeys.TRAIN: h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob) # Add a fully connected layer to do prediction with tf.name_scope("output"): W = tf.Variable( tf.truncated_normal([num_filters_total, MAX_LABEL], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[MAX_LABEL]), name="b") scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores") return estimator_spec_for_softmax_classification( logits=scores, labels=labels, mode=mode, learning_rate=FLAGS.learning_rate) return cnn_model def bag_of_words_model(features, labels, mode): """A bag-of-words model using a learned word embedding. Note it disregards the word order in the text. Returns a tf.estimator.EstimatorSpec. """ bow_column = tf.feature_column.categorical_column_with_identity( WORDS_FEATURE, num_buckets=n_words) # The embedding values are initialized randomly, and are trained along with # all other model parameters to minimize the training loss. bow_embedding_column = tf.feature_column.embedding_column( bow_column, dimension=BOW_PARAMS.EMBEDDING_SIZE) bow = tf.feature_column.input_layer( features, feature_columns=[bow_embedding_column]) logits = tf.layers.dense(bow, MAX_LABEL, activation=None) return estimator_spec_for_softmax_classification( logits=logits, labels=labels, mode=mode, learning_rate=FLAGS.learning_rate) def main(FLAGS): global n_words tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.verbose: tf.logging.info("Running in verbose mode") tf.logging.set_verbosity(tf.logging.DEBUG) # Load and split data tf.logging.info("Loading data from {0}".format(FLAGS.train_data)) data = wikidata.WikiData( FLAGS.train_data, FLAGS.y_class, seed=DATA_SEED, train_percent=TRAIN_PERCENT, max_document_length=MAX_DOCUMENT_LENGTH, char_ngrams=FLAGS.char_ngrams, min_frequency=FLAGS.min_frequency) n_words = len(data.vocab_processor.vocabulary_) tf.logging.info("Total words: %d" % n_words) # Build model if FLAGS.model == "bag_of_words": model_fn = bag_of_words_model # Subtract 1 because VocabularyProcessor outputs a word-id matrix where word # ids start from 1 and 0 means 'no word'. But categorical_column_with_identity # assumes 0-based count and uses -1 for missing word. data.x_train = data.x_train - 1 data.x_test = data.x_test - 1 elif FLAGS.model == "cnn": model_fn = get_cnn_model(FLAGS.embedding_size, FLAGS.num_filters, FLAGS.dropout_keep_prob) else: tf.logging.error("Unknown specified model '{}', must be one of {}".format( FLAGS.model, MODEL_LIST)) raise ValueError classifier = tf.estimator.Estimator( model_fn=model_fn, config=tf.contrib.learn.RunConfig( tf_random_seed=TRAIN_SEED, ## Uncomment to see CPU/GPU allocation in logs. # session_config=tf.ConfigProto(log_device_placement=True), ), model_dir=FLAGS.job_dir) # Train model train_input_fn = tf.estimator.inputs.numpy_input_fn( x={WORDS_FEATURE: data.x_train}, y=data.y_train, batch_size=BATCH_SIZE, num_epochs=None, # Note: For training, set this to None, so the input_fn # keeps returning data until the required number of train # steps is reached. shuffle=True) classifier.train(input_fn=train_input_fn, steps=FLAGS.train_steps) # Predict on held-out test data test_input_fn = tf.estimator.inputs.numpy_input_fn( x={WORDS_FEATURE: data.x_test}, y=data.y_test, num_epochs=1, # Note: For evaluation and prediction set this to 1, # so the input_fn will iterate over the data once and # then raise OutOfRangeError shuffle=False) predicted_test = classifier.predict(input_fn=test_input_fn) test_out = pd.DataFrame( [(p["classes"], p["probs"][1]) for p in predicted_test], columns=["y_predicted", "prob"]) # Score with sklearn and TensorFlow sklearn_score = metrics.accuracy_score(data.y_test, test_out["y_predicted"]) tf_scores = classifier.evaluate(input_fn=test_input_fn) train_size = len(data.x_train) test_size = len(data.x_test) baseline = len(data.y_train[data.y_train == 0]) / len(data.y_train) if baseline < .5: baseline = 1 - baseline tf.logging.info("") tf.logging.info("----------Evaluation on Held-Out Data---------") tf.logging.info("Train Size: {0} Test Size: {1}".format( train_size, test_size)) tf.logging.info("Baseline (class distribution): {0:f}".format(baseline)) tf.logging.info("Accuracy (sklearn): {0:f}".format(sklearn_score)) for key in sorted(tf_scores): tf.logging.info("%s: %s" % (key, tf_scores[key])) # Export the model feature_spec = { WORDS_FEATURE: tf.FixedLenFeature(dtype=tf.int64, shape=MAX_DOCUMENT_LENGTH) } serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec) classifier.export_savedmodel(FLAGS.job_dir, serving_input_fn) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--verbose", help="Run in verbose mode.", action="store_true") parser.add_argument( "--train_data", type=str, default="", help="Path to the training data.") parser.add_argument( "--y_class", type=str, default="toxic", help="Class to train model against, one of cnn, bag_of_words") parser.add_argument( "--model", type=str, default="bag_of_words", help="The model to train, one of {}".format(MODEL_LIST)) parser.add_argument( "--train_steps", type=int, default=100, help="The number of steps to train the model") parser.add_argument( "--embedding_size", type=int, default=50, help="The size of the word embedding") parser.add_argument( "--dropout_keep_prob", type=float, default=0.75, help="The dropout keep probability") parser.add_argument( "--num_filters", type=int, default=10, help="The number of filters in each size") parser.add_argument( "--job-dir", type=str, default="", help="The directory where the job is staged") parser.add_argument( "--char_ngrams", type=int, default=0, help="Size of overlapping character ngrams to split into, use words if 0") parser.add_argument( "--learning_rate", type=float, default=0.01, help="The model learning rate") parser.add_argument( "--min_frequency", type=int, default=0, help="Minimum count for tokens passed to VocabularyProcessor") FLAGS = parser.parse_args() main(FLAGS) ================================================ FILE: kaggle-classification/trainer/wikidata.py ================================================ """Class to encapsulate training and test data.""" import numpy as np import pandas as pd import tensorflow as tf import tflearn from sklearn.model_selection import train_test_split Y_CLASSES = [ 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate' ] def ngrams(sentence, ngram_size): """Converts a string into a list of ngrams of characters. ngrams('abra cadabra', 5) = [('a', 'b', 'r', 'a', ' '), ('b', 'r', 'a', ' ', 'c'), ... ('a', 'd', 'a', 'b', 'r'), ('d', 'a', 'b', 'r', 'a')] """ chars = list(sentence) return zip(*[chars[i:] for i in range(ngram_size)]) class WikiData: def __init__(self, data_path, y_class, max_document_length, vocab_processor_path=None, test_mode=False, seed=None, train_percent=None, char_ngrams=None, min_frequency=None): """Args: * data_path (string): path to file containing train or test data * y_class (string): the class we're training or testing on * vocab_processor_path (string): if provided, the comment_text data will be processed with the vocab processor at that location. If not, a new vocab_processor will be created using the training data. * test_mode (boolean): true if loading data just to test on, not training a model * seed (integer): a random seed to use for data splitting * train_percent (fload): the percent of data we should use for training data """ data = self._load_csv(data_path) self.x_train, self.x_train_text = None, None self.x_test, self.x_test_text = None, None self.y_train = None self.y_test = None self.vocab_processor = None # If test_mode is True, then put all the data in x_test and y_test if test_mode: train_percent = 0 # Split the data into test / train sets self.x_train_text, self.x_test_text, self.y_train, self.y_test \ = self._split(data, train_percent, 'comment_text', y_class, seed) # Either load a VocabularyProcessor or compute one from the training data if test_mode: # If test_mode is True and no vocab_processor_path is specified, then # return an error. We shouldn't train a VocabProcessor at test time. if vocab_processor_path is None: tf.logging.error( 'Loading data in test_mode with no vocab_processor_path') raise ValueError self.vocab_processor = self.load_vocab_processor(vocab_processor_path) else: tokenizer_fn = None if char_ngrams: tokenizer_fn = lambda iterator: ( ngrams(x, char_ngrams) for x in iterator) self.vocab_processor = tflearn.data_utils.VocabularyProcessor( max_document_length=max_document_length, min_frequency=min_frequency, tokenizer_fn=tokenizer_fn) self.x_train = np.array( list(self.vocab_processor.fit_transform(self.x_train_text))) # Apply the VocabularyProcessor to the test data self.x_test = np.array( list(self.vocab_processor.transform(self.x_test_text))) def _load_vocab_processor(self, path): """Load a VocabularyProcessor from the provided path""" return tflearn.data_utils.VocabularyProcessor.restore(path) def _load_csv(self, path): """Reads CSV from specified location and returns the data as a Pandas Dataframe. Will work with a Cloud Storage path, e.g. 'gs:///' or a local path. Assumes data can fit into memory. """ with tf.gfile.Open(path, 'rb') as fileobj: df = pd.read_csv(fileobj, encoding='utf-8') return df def _split(self, data, train_percent, x_field, y_class, seed=None): """Split divides the Wikipedia data into test and train subsets. Args: * data (dataframe): a dataframe with data for 'comment_text' and y_class * train_percent (float): the fraction of data to use for training * x_field (string): attribute of the wiki data to use to train, e.g. 'comment_text' * y_class (string): attribute of the wiki data to predict, e.g. 'toxic' * seed (integer): a seed to use to split the data in a reproducible way Returns: x_train (dataframe): a pandas series with the text from each train example y_train (dataframe): the 0 or 1 labels for the training data x_test (dataframe): a pandas series with the text from each test example y_test (dataframe): the 0 or 1 labels for the test data """ if y_class not in Y_CLASSES: tf.logging.error('Specified y_class {0} not in list of possible classes {1}'\ .format(y_class, Y_CLASSES)) raise ValueError if train_percent > 1 or train_percent < 0: tf.logging.error('Specified train_percent {0} is not between 0 and 1'\ .format(train_percent)) raise ValueError X = data[x_field] y = data[y_class] x_train, x_test, y_train, y_test = train_test_split( X, y, test_size=1 - train_percent, random_state=seed) return x_train, x_test, np.array(y_train), np.array(y_test) ================================================ FILE: model_evaluation/BiosBias Evaluation.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "from IPython.display import display\n", "import json\n", "import numpy as np\n", "import pandas as pd\n", "import os\n", "import random\n", "import re\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import sklearn.metrics as metrics\n", "import tensorflow as tf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read scored test data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "standard_data_path = 'gs://conversationai-models/biosbias/scored_data/test_standard_0409.csv'\n", "scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_scrubbed_0409.csv'\n", "very_scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_very_scrubbed_0409.csv'\n", "gender_data_path = 'gs://conversationai-models/biosbias/scored_data/test_data_gender.csv'\n", "\n", "\n", "perf_df = pd.read_csv(tf.gfile.Open(standard_data_path)).drop_duplicates(subset=['tokens'])\n", "scrubbed_df = pd.read_csv(tf.gfile.Open(scrubbed_data_path)).drop_duplicates(subset=['tokens'])\n", "very_scrubbed_df = pd.read_csv(tf.gfile.Open(very_scrubbed_data_path)).drop_duplicates(subset=['tokens'])\n", "gender_df = pd.read_csv(tf.gfile.Open(gender_data_path)).drop_duplicates(subset=['tokens'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(59824, 300)\n", "(59820, 36)\n" ] } ], "source": [ "print(perf_df.shape)\n", "print(scrubbed_df.shape)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df = perf_df.join(scrubbed_df, rsuffix = '_scrubbed')\n", "df = df.join(very_scrubbed_df, rsuffix = '_very_scrubbed')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tokensgenderlabeltf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6...tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32
0[u'he', u'is', u'currently', u'working', u'clo...M250.0000084.625991e-140.0000890.0004322.642943e-041.613340e-074.687537e-07...0.0019291.914383e-060.0000970.0003327.086468e-078.798547e-160.0000410.0003950.0000548.315536e-08
1[u'she', u'has', u'a', u'passion', u'for', u'w...F260.0000015.970340e-180.0000040.0001558.439872e-061.380430e-078.653511e-09...0.0133567.866625e-010.0092690.0242643.710595e-042.425320e-110.0044880.0024260.0324671.274749e-04
2[u'growing', u'up', u'under', u'the', u'influe...M220.0002051.023775e-150.0080200.0000541.159827e-062.420847e-064.043094e-06...0.0001358.046401e-040.0021730.0006973.003297e-058.979249e-140.0019010.0000970.0017274.318769e-06
3[u'he', u'earned', u'his', u'beng', u'degree',...M250.0000091.354895e-130.0015080.0000511.071294e-071.333064e-081.857020e-05...0.0092171.700057e-020.1360350.0095812.460610e-031.396903e-090.0022760.0098110.0268411.840305e-04
4[u'her', u'professional', u'and', u'educationa...F250.0010346.887217e-120.0007010.0211891.852501e-036.723991e-057.880444e-06...0.0004259.174340e-080.9951510.0016359.952086e-114.422046e-140.0009740.0000390.0004821.483144e-07
\n", "

5 rows × 372 columns

\n", "
" ], "text/plain": [ " tokens gender label \\\n", "0 [u'he', u'is', u'currently', u'working', u'clo... M 25 \n", "1 [u'she', u'has', u'a', u'passion', u'for', u'w... F 26 \n", "2 [u'growing', u'up', u'under', u'the', u'influe... M 22 \n", "3 [u'he', u'earned', u'his', u'beng', u'degree',... M 25 \n", "4 [u'her', u'professional', u'and', u'educationa... F 25 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0 \\\n", "0 0.000008 \n", "1 0.000001 \n", "2 0.000205 \n", "3 0.000009 \n", "4 0.001034 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1 \\\n", "0 4.625991e-14 \n", "1 5.970340e-18 \n", "2 1.023775e-15 \n", "3 1.354895e-13 \n", "4 6.887217e-12 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2 \\\n", "0 0.000089 \n", "1 0.000004 \n", "2 0.008020 \n", "3 0.001508 \n", "4 0.000701 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3 \\\n", "0 0.000432 \n", "1 0.000155 \n", "2 0.000054 \n", "3 0.000051 \n", "4 0.021189 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4 \\\n", "0 2.642943e-04 \n", "1 8.439872e-06 \n", "2 1.159827e-06 \n", "3 1.071294e-07 \n", "4 1.852501e-03 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5 \\\n", "0 1.613340e-07 \n", "1 1.380430e-07 \n", "2 2.420847e-06 \n", "3 1.333064e-08 \n", "4 6.723991e-05 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6 \\\n", "0 4.687537e-07 \n", "1 8.653511e-09 \n", "2 4.043094e-06 \n", "3 1.857020e-05 \n", "4 7.880444e-06 \n", "\n", " ... \\\n", "0 ... \n", "1 ... \n", "2 ... \n", "3 ... \n", "4 ... \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23 \\\n", "0 0.001929 \n", "1 0.013356 \n", "2 0.000135 \n", "3 0.009217 \n", "4 0.000425 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24 \\\n", "0 1.914383e-06 \n", "1 7.866625e-01 \n", "2 8.046401e-04 \n", "3 1.700057e-02 \n", "4 9.174340e-08 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25 \\\n", "0 0.000097 \n", "1 0.009269 \n", "2 0.002173 \n", "3 0.136035 \n", "4 0.995151 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26 \\\n", "0 0.000332 \n", "1 0.024264 \n", "2 0.000697 \n", "3 0.009581 \n", "4 0.001635 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27 \\\n", "0 7.086468e-07 \n", "1 3.710595e-04 \n", "2 3.003297e-05 \n", "3 2.460610e-03 \n", "4 9.952086e-11 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28 \\\n", "0 8.798547e-16 \n", "1 2.425320e-11 \n", "2 8.979249e-14 \n", "3 1.396903e-09 \n", "4 4.422046e-14 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29 \\\n", "0 0.000041 \n", "1 0.004488 \n", "2 0.001901 \n", "3 0.002276 \n", "4 0.000974 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30 \\\n", "0 0.000395 \n", "1 0.002426 \n", "2 0.000097 \n", "3 0.009811 \n", "4 0.000039 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31 \\\n", "0 0.000054 \n", "1 0.032467 \n", "2 0.001727 \n", "3 0.026841 \n", "4 0.000482 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32 \n", "0 8.315536e-08 \n", "1 1.274749e-04 \n", "2 4.318769e-06 \n", "3 1.840305e-04 \n", "4 1.483144e-07 \n", "\n", "[5 rows x 372 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(59824, 372)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(59753, 372)\n" ] } ], "source": [ "df = df.dropna()\n", "print(df.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Preprocessing" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def get_class_from_col_name(col_name):\n", " #print(col_name)\n", " pattern = r'^.*_(\\d+)$'\n", " return int(re.search(pattern, col_name).group(1))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def find_best_class(df, model_name, class_names):\n", " model_class_names = ['{}_{}'.format(model_name, class_name) for class_name in class_names]\n", " sub_df = df[model_class_names]\n", " df['{}_class'.format(model_name)] = sub_df.idxmax(axis=1).apply(get_class_from_col_name)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "array(['tokens', 'gender', 'label',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_32',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_32',\n", " 'tokens_scrubbed', 'gender_scrubbed', 'label_scrubbed',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_32',\n", " 'tokens_very_scrubbed', 'gender_very_scrubbed',\n", " 'label_very_scrubbed',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_0',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_5',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_6',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_7',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_8',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_9',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_10',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_11',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_12',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_13',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_14',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_15',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_16',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_17',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_18',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_19',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_20',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_21',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_22',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32'],\n", " dtype=object)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Can check model names here\n", "# df.columns.values" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# May have to change.\n", "# Can look them up in experiment tracker.\n", "MODEL_NAMES = {\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837': 'debiased_tolga',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941': 'debiased_biosbias',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003': 'strong_debiased_1',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019': 'strong_debiased_2',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034': 'strong_debiased_3',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055': 'strong_debiased_4',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117': 'glove',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113': 'strong_no_equalize',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131': 'strong_no_projection', \n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954': 'scrubbed',\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254': 'very_scrubbed'\n", "}" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "CLASS_NAMES = range(33)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "for _model in MODEL_NAMES:\n", " find_best_class(df, _model, CLASS_NAMES)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# Labels with either gender having too few examples\n", "bad_labels = df.groupby('label').gender.value_counts().reset_index(name = 'count').query('count < 5').label.values\n", "assert len(bad_labels) == 0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Accuracy Calculation" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy for model debiased_biosbias: 0.806972034877\n", "Accuracy for model very_scrubbed: 0.355915184175\n", "Accuracy for model debiased_tolga: 0.818921225713\n", "Accuracy for model strong_debiased_1: 0.817984034274\n", "Accuracy for model strong_no_projection: 0.806687530333\n", "Accuracy for model strong_debiased_2: 0.81733134738\n", "Accuracy for model strong_no_equalize: 0.815239402206\n", "Accuracy for model glove: 0.817950563152\n", "Accuracy for model strong_debiased_4: 0.814737335364\n", "Accuracy for model strong_debiased_3: 0.817599116362\n", "Accuracy for model scrubbed: 0.130503907754\n" ] } ], "source": [ "accuracy_list = []\n", "for _model in MODEL_NAMES:\n", " is_correct = (df['{}_class'.format(_model)] == df['label'])\n", " _acc = sum(is_correct)/len(is_correct)\n", " accuracy_list.append(_acc)\n", " print ('Accuracy for model {}: {}'.format(MODEL_NAMES[_model], _acc))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fairness Metrics" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "for _class in CLASS_NAMES:\n", " df['label_{}'.format(_class)] = (df['label'] == _class)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# Gender ratios of classes\n", "gender_counts = df.groupby('label').gender.value_counts().reset_index(name = 'count')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "def frac_female(df):\n", " m_count = df[df['gender'] == \"M\"]['count'].values[0]\n", " f_count = df[df['gender'] == \"F\"]['count'].values[0]\n", " return {'label': df['label'].values[0], 'frac_female': f_count/(m_count+f_count)}" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "frac_female_df = pd.DataFrame(list(gender_counts.groupby('label', as_index = False).apply(frac_female)))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "def compute_tpr(df, _class, _model, threshold = 0.5):\n", " tpr = metrics.recall_score(df['label_{}'.format(_class)],\n", " df['{}_{}'.format(_model,_class)] > threshold)\n", " return tpr\n", " \n", "def compute_tpr_by_gender(df, _class, _model, threshold = 0.5):\n", " tpr_m = compute_tpr(df.query('gender == \"M\"'), _class, _model, threshold)\n", " tpr_f = compute_tpr(df.query('gender == \"F\"'), _class, _model, threshold)\n", " return {'M': tpr_m, 'F': tpr_f}" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "def compute_tpr_tnr(df, _class, _model, threshold = 0.5):\n", " #cm = metrics.confusion_matrix(df['label_{}'.format(_class)],\n", " # df['{}_{}'.format(_model,_class)] > threshold)\n", " cm = pd.crosstab(df['label_{}'.format(_class)], df['{}_{}'.format(_model,_class)] > threshold)\n", " #display(cm)\n", " if cm.shape[0] > 1:\n", " tn = cm.iloc[0,0]\n", " fp = cm.iloc[0,1]\n", " fn = cm.iloc[1,0]\n", " tp = cm.iloc[1,1]\n", " tpr = tp/(tp+fn)\n", " tnr = tn/(tn+fp)\n", " else:\n", " tpr = 0\n", " tnr = 1\n", " return tpr, tnr\n", "\n", "def compute_tr_by_gender(df, _class, _model, threshold = 0.5):\n", " tpr_m, tnr_m = compute_tpr_tnr(df.query('gender == \"M\"'), _class, _model, threshold)\n", " tpr_f, tnr_f = compute_tpr_tnr(df.query('gender == \"F\"'), _class, _model, threshold)\n", " return {'TPR_m': tpr_m, 'TPR_f': tpr_f, 'TNR_m': tnr_m, 'TNR_f': tnr_f}" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "for _class in CLASS_NAMES:\n", " for _model in MODEL_NAMES:\n", " tpr_1 = compute_tpr(df, _class, _model)\n", " tpr_2, _ = compute_tpr_tnr(df, _class, _model)\n", " assert tpr_1 == tpr_2, '{} != {}'.format(tpr_1, tpr_2)\n", " #print('{} == {}'.format(tpr_1, tpr_2))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "tpr_df = pd.DataFrame()\n", "for _class in frac_female_df.label:\n", " row = {}\n", " row['label'] = _class\n", " for _model, _model_type in MODEL_NAMES.items():\n", " tpr, tnr = compute_tpr_tnr(df, _class, _model)\n", " row['{}_tpr'.format(_model_type)] = tpr\n", " row['{}_tnr'.format(_model_type)] = tnr\n", " gender_trs = compute_tr_by_gender(df, _class, _model)\n", " row['{}_tpr_F'.format(_model_type)] = gender_trs['TPR_f']\n", " row['{}_tpr_M'.format(_model_type)] = gender_trs['TPR_m']\n", " row['{}_tpr_gender_gap'.format(_model_type)] = gender_trs['TPR_f'] - gender_trs['TPR_m']\n", " row['{}_tnr_F'.format(_model_type)] = gender_trs['TNR_f']\n", " row['{}_tnr_M'.format(_model_type)] = gender_trs['TNR_m']\n", " row['{}_tnr_gender_gap'.format(_model_type)] = gender_trs['TNR_f'] - gender_trs['TNR_m']\n", " tpr_df = tpr_df.append(row, ignore_index = True)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "results_df = pd.merge(tpr_df, frac_female_df, on = 'label')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "TITLE_LABELS = [\n", " 'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',\n", " 'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',\n", " 'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',\n", " 'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',\n", " 'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "results_df['label_profession'] = results_df['label'].apply(lambda x: TITLE_LABELS[int(x)])" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
frac_female
frac_female1.000000
debiased_biosbias_tpr_gender_gap0.829982
very_scrubbed_tpr_gender_gap0.458378
debiased_tolga_tpr_gender_gap0.824882
strong_debiased_1_tpr_gender_gap0.716922
strong_no_projection_tpr_gender_gap0.709000
strong_debiased_2_tpr_gender_gap0.596896
strong_no_equalize_tpr_gender_gap0.772645
glove_tpr_gender_gap0.794059
strong_debiased_4_tpr_gender_gap0.550435
strong_debiased_3_tpr_gender_gap0.707174
scrubbed_tpr_gender_gap-0.282919
\n", "
" ], "text/plain": [ " frac_female\n", "frac_female 1.000000\n", "debiased_biosbias_tpr_gender_gap 0.829982\n", "very_scrubbed_tpr_gender_gap 0.458378\n", "debiased_tolga_tpr_gender_gap 0.824882\n", "strong_debiased_1_tpr_gender_gap 0.716922\n", "strong_no_projection_tpr_gender_gap 0.709000\n", "strong_debiased_2_tpr_gender_gap 0.596896\n", "strong_no_equalize_tpr_gender_gap 0.772645\n", "glove_tpr_gender_gap 0.794059\n", "strong_debiased_4_tpr_gender_gap 0.550435\n", "strong_debiased_3_tpr_gender_gap 0.707174\n", "scrubbed_tpr_gender_gap -0.282919" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]\n", " " ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "tpr_gender_gap_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\n", "tnr_gender_gap_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "gender_gap_df = results_df[['label_profession', 'frac_female']+tpr_gender_gap_cols+tnr_gender_gap_cols]\n", "#gender_gap_df.columns = ['label_profession', 'frac_female']+['{}'.format(_model) for _model in MODEL_NAMES.values()]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
label_professionfrac_femaledebiased_biosbias_tpr_gender_gapvery_scrubbed_tpr_gender_gapdebiased_tolga_tpr_gender_gapstrong_debiased_1_tpr_gender_gapstrong_no_projection_tpr_gender_gapstrong_debiased_2_tpr_gender_gapstrong_no_equalize_tpr_gender_gapglove_tpr_gender_gap...very_scrubbed_tnr_gender_gapdebiased_tolga_tnr_gender_gapstrong_debiased_1_tnr_gender_gapstrong_no_projection_tnr_gender_gapstrong_debiased_2_tnr_gender_gapstrong_no_equalize_tnr_gender_gapglove_tnr_gender_gapstrong_debiased_4_tnr_gender_gapstrong_debiased_3_tnr_gender_gapscrubbed_tnr_gender_gap
7dietitian0.9204370.2909270.1738780.2977070.1999000.2238620.1870720.2509800.232835...-0.000674-0.002692-0.001220-0.001617-0.001530-0.002213-0.002810-0.001630-0.001412-0.000266
13nurse0.9145020.0827350.0137420.0853770.0487400.0332710.0259810.0574040.082411...-0.001686-0.007627-0.007427-0.004478-0.004807-0.004455-0.005866-0.002840-0.002707-0.001573
15paralegal0.8661090.3757550.0946560.3174820.2620770.2569440.2714370.3149150.271437...0.000247-0.0001030.000095-0.0000120.000075-0.000219-0.000164-0.000060-0.0002360.000195
27yoga_teacher0.8586960.2765340.0055180.1437840.2080490.1161960.1950670.1616360.208374...0.000535-0.001455-0.001289-0.000758-0.001393-0.001211-0.001211-0.001081-0.0013990.000299
12model0.8189880.4806520.1761200.5443090.4184560.4602110.4558240.5325510.505093...-0.001022-0.0005660.0003790.000429-0.000039-0.000513-0.0010080.0002490.0001810.001161
10interior_designer0.7826090.182716-0.0135800.2432100.0814810.0962960.0419750.2246910.270370...0.000226-0.0008840.000032-0.0000230.000024-0.000676-0.0002010.000216-0.0000130.000407
22psychologist0.6207510.0007990.0088900.0458760.0435240.0451690.0202190.0420560.017593...-0.000742-0.005913-0.004535-0.002672-0.002096-0.004275-0.002278-0.003760-0.002820-0.001450
26teacher0.6043820.1112210.0253520.1292990.1117600.1137560.1142460.1191680.137121...-0.000813-0.004694-0.002497-0.004570-0.001141-0.002609-0.002664-0.002461-0.0017850.000671
11journalist0.4921520.0198650.0101820.0575540.0219200.0017900.0130700.0429230.058686...0.000762-0.0001670.0012860.0015140.0019550.0006510.0000140.0016170.001571-0.002623
19physician0.4912030.0198450.0368500.0569890.0351200.0425540.0407190.0348960.024797...0.0014130.0057900.0061250.0063850.0069680.0047610.0075370.0018440.0041260.000307
20poet0.483051-0.0441630.009395-0.0071900.0122070.006903-0.0067110.0163930.001949...0.000827-0.000845-0.001125-0.000642-0.000209-0.000453-0.000933-0.000733-0.000761-0.000698
17personal_trainer0.468293-0.080944-0.011850-0.0680430.032397-0.028670-0.037557-0.091361-0.049694...-0.000783-0.000399-0.001005-0.000138-0.000473-0.000470-0.000456-0.000816-0.0007370.000032
21professor0.452428-0.0181190.011301-0.011141-0.015243-0.0123840.002382-0.004640-0.002251...-0.0016400.0012590.0013490.004071-0.003034-0.004298-0.003673-0.000717-0.003564-0.001995
14painter0.4523610.0031610.0360120.017337-0.0355380.0129590.006991-0.001613-0.002095...-0.000951-0.000336-0.000125-0.0001970.0001730.000315-0.000022-0.0002230.0000500.000144
0accountant0.374554-0.055930-0.031311-0.043805-0.0253120.000459-0.015143-0.044432-0.060287...0.0013410.0000900.0003900.0006940.0004830.0006830.0007570.0003440.000355-0.000109
2attorney0.367104-0.035824-0.003903-0.0072700.0072540.0139280.004176-0.010897-0.004719...0.003337-0.000622-0.001509-0.001953-0.001427-0.001875-0.002338-0.002469-0.001728-0.000642
18photographer0.356927-0.052775-0.011488-0.036094-0.004054-0.017355-0.004763-0.035910-0.031379...0.0015680.000615-0.000121-0.000051-0.0000250.0015420.001537-0.0000920.000170-0.000297
6dentist0.3458240.009651-0.0407380.0031240.0131020.0141660.0081040.0172420.015563...0.0009450.0007380.0005320.0005740.0005630.0004090.0008010.0005160.0003590.000283
9filmmaker0.322148-0.005893-0.023485-0.0173560.0386900.0327970.018358-0.019507-0.001827...0.0001270.0020680.0012360.0015350.0016410.0018470.0020940.0012040.0008140.002261
3chiropractor0.298824-0.025604-0.004360-0.073746-0.0231460.024071-0.021350-0.011547-0.028457...-0.0001520.0009210.0003450.0001750.0006170.0004190.0001270.0000960.0003090.000236
16pastor0.229282-0.274172-0.069785-0.259533-0.096731-0.127909-0.156583-0.218206-0.166127...-0.0000730.0010510.0007410.0006020.0004530.0011370.0012930.0003330.0003330.000199
1architect0.2253990.0535510.0030690.0032080.1067690.1108080.073486-0.0055930.049996...-0.0011150.0043530.0010760.0010650.0028150.0059410.0029350.0024230.003652-0.001770
4comedian0.219457-0.225967-0.081757-0.156671-0.065501-0.076109-0.087733-0.118004-0.124757...0.0002340.000593-0.0000300.000366-0.0001330.0006330.0007530.000327-0.0000550.000581
24software_engineer0.157746-0.0654560.023591-0.056956-0.042324-0.060300-0.0212020.015468-0.036829...0.0012090.0051000.0092600.0071320.0062660.0035120.0064740.0048850.004344-0.000074
25surgeon0.153592-0.229816-0.051839-0.245461-0.122859-0.127233-0.089205-0.220015-0.207968...0.0024350.0058880.0046380.0024880.0023200.0040590.0050130.0034320.0033160.000101
5composer0.153186-0.0687120.036272-0.048370-0.001737-0.050061-0.008452-0.064452-0.063849...0.0011540.0018020.0014630.0015430.0013890.0016760.0015670.0010110.0012450.002464
8dj0.145299-0.1038240.099118-0.1450000.027647-0.0838240.000882-0.178824-0.040588...0.0002440.0011330.0002570.0002110.0007210.0005640.0002060.0008260.000299-0.000025
23rapper0.085859-0.1387720.047449-0.0961980.0172250.0302240.1754960.019175-0.012350...0.0005770.0001550.0006310.0004910.0002680.0008340.0012000.0006450.000180-0.000048
\n", "

28 rows × 24 columns

\n", "
" ], "text/plain": [ " label_profession frac_female debiased_biosbias_tpr_gender_gap \\\n", "7 dietitian 0.920437 0.290927 \n", "13 nurse 0.914502 0.082735 \n", "15 paralegal 0.866109 0.375755 \n", "27 yoga_teacher 0.858696 0.276534 \n", "12 model 0.818988 0.480652 \n", "10 interior_designer 0.782609 0.182716 \n", "22 psychologist 0.620751 0.000799 \n", "26 teacher 0.604382 0.111221 \n", "11 journalist 0.492152 0.019865 \n", "19 physician 0.491203 0.019845 \n", "20 poet 0.483051 -0.044163 \n", "17 personal_trainer 0.468293 -0.080944 \n", "21 professor 0.452428 -0.018119 \n", "14 painter 0.452361 0.003161 \n", "0 accountant 0.374554 -0.055930 \n", "2 attorney 0.367104 -0.035824 \n", "18 photographer 0.356927 -0.052775 \n", "6 dentist 0.345824 0.009651 \n", "9 filmmaker 0.322148 -0.005893 \n", "3 chiropractor 0.298824 -0.025604 \n", "16 pastor 0.229282 -0.274172 \n", "1 architect 0.225399 0.053551 \n", "4 comedian 0.219457 -0.225967 \n", "24 software_engineer 0.157746 -0.065456 \n", "25 surgeon 0.153592 -0.229816 \n", "5 composer 0.153186 -0.068712 \n", "8 dj 0.145299 -0.103824 \n", "23 rapper 0.085859 -0.138772 \n", "\n", " very_scrubbed_tpr_gender_gap debiased_tolga_tpr_gender_gap \\\n", "7 0.173878 0.297707 \n", "13 0.013742 0.085377 \n", "15 0.094656 0.317482 \n", "27 0.005518 0.143784 \n", "12 0.176120 0.544309 \n", "10 -0.013580 0.243210 \n", "22 0.008890 0.045876 \n", "26 0.025352 0.129299 \n", "11 0.010182 0.057554 \n", "19 0.036850 0.056989 \n", "20 0.009395 -0.007190 \n", "17 -0.011850 -0.068043 \n", "21 0.011301 -0.011141 \n", "14 0.036012 0.017337 \n", "0 -0.031311 -0.043805 \n", "2 -0.003903 -0.007270 \n", "18 -0.011488 -0.036094 \n", "6 -0.040738 0.003124 \n", "9 -0.023485 -0.017356 \n", "3 -0.004360 -0.073746 \n", "16 -0.069785 -0.259533 \n", "1 0.003069 0.003208 \n", "4 -0.081757 -0.156671 \n", "24 0.023591 -0.056956 \n", "25 -0.051839 -0.245461 \n", "5 0.036272 -0.048370 \n", "8 0.099118 -0.145000 \n", "23 0.047449 -0.096198 \n", "\n", " strong_debiased_1_tpr_gender_gap strong_no_projection_tpr_gender_gap \\\n", "7 0.199900 0.223862 \n", "13 0.048740 0.033271 \n", "15 0.262077 0.256944 \n", "27 0.208049 0.116196 \n", "12 0.418456 0.460211 \n", "10 0.081481 0.096296 \n", "22 0.043524 0.045169 \n", "26 0.111760 0.113756 \n", "11 0.021920 0.001790 \n", "19 0.035120 0.042554 \n", "20 0.012207 0.006903 \n", "17 0.032397 -0.028670 \n", "21 -0.015243 -0.012384 \n", "14 -0.035538 0.012959 \n", "0 -0.025312 0.000459 \n", "2 0.007254 0.013928 \n", "18 -0.004054 -0.017355 \n", "6 0.013102 0.014166 \n", "9 0.038690 0.032797 \n", "3 -0.023146 0.024071 \n", "16 -0.096731 -0.127909 \n", "1 0.106769 0.110808 \n", "4 -0.065501 -0.076109 \n", "24 -0.042324 -0.060300 \n", "25 -0.122859 -0.127233 \n", "5 -0.001737 -0.050061 \n", "8 0.027647 -0.083824 \n", "23 0.017225 0.030224 \n", "\n", " strong_debiased_2_tpr_gender_gap strong_no_equalize_tpr_gender_gap \\\n", "7 0.187072 0.250980 \n", "13 0.025981 0.057404 \n", "15 0.271437 0.314915 \n", "27 0.195067 0.161636 \n", "12 0.455824 0.532551 \n", "10 0.041975 0.224691 \n", "22 0.020219 0.042056 \n", "26 0.114246 0.119168 \n", "11 0.013070 0.042923 \n", "19 0.040719 0.034896 \n", "20 -0.006711 0.016393 \n", "17 -0.037557 -0.091361 \n", "21 0.002382 -0.004640 \n", "14 0.006991 -0.001613 \n", "0 -0.015143 -0.044432 \n", "2 0.004176 -0.010897 \n", "18 -0.004763 -0.035910 \n", "6 0.008104 0.017242 \n", "9 0.018358 -0.019507 \n", "3 -0.021350 -0.011547 \n", "16 -0.156583 -0.218206 \n", "1 0.073486 -0.005593 \n", "4 -0.087733 -0.118004 \n", "24 -0.021202 0.015468 \n", "25 -0.089205 -0.220015 \n", "5 -0.008452 -0.064452 \n", "8 0.000882 -0.178824 \n", "23 0.175496 0.019175 \n", "\n", " glove_tpr_gender_gap ... \\\n", "7 0.232835 ... \n", "13 0.082411 ... \n", "15 0.271437 ... \n", "27 0.208374 ... \n", "12 0.505093 ... \n", "10 0.270370 ... \n", "22 0.017593 ... \n", "26 0.137121 ... \n", "11 0.058686 ... \n", "19 0.024797 ... \n", "20 0.001949 ... \n", "17 -0.049694 ... \n", "21 -0.002251 ... \n", "14 -0.002095 ... \n", "0 -0.060287 ... \n", "2 -0.004719 ... \n", "18 -0.031379 ... \n", "6 0.015563 ... \n", "9 -0.001827 ... \n", "3 -0.028457 ... \n", "16 -0.166127 ... \n", "1 0.049996 ... \n", "4 -0.124757 ... \n", "24 -0.036829 ... \n", "25 -0.207968 ... \n", "5 -0.063849 ... \n", "8 -0.040588 ... \n", "23 -0.012350 ... \n", "\n", " very_scrubbed_tnr_gender_gap debiased_tolga_tnr_gender_gap \\\n", "7 -0.000674 -0.002692 \n", "13 -0.001686 -0.007627 \n", "15 0.000247 -0.000103 \n", "27 0.000535 -0.001455 \n", "12 -0.001022 -0.000566 \n", "10 0.000226 -0.000884 \n", "22 -0.000742 -0.005913 \n", "26 -0.000813 -0.004694 \n", "11 0.000762 -0.000167 \n", "19 0.001413 0.005790 \n", "20 0.000827 -0.000845 \n", "17 -0.000783 -0.000399 \n", "21 -0.001640 0.001259 \n", "14 -0.000951 -0.000336 \n", "0 0.001341 0.000090 \n", "2 0.003337 -0.000622 \n", "18 0.001568 0.000615 \n", "6 0.000945 0.000738 \n", "9 0.000127 0.002068 \n", "3 -0.000152 0.000921 \n", "16 -0.000073 0.001051 \n", "1 -0.001115 0.004353 \n", "4 0.000234 0.000593 \n", "24 0.001209 0.005100 \n", "25 0.002435 0.005888 \n", "5 0.001154 0.001802 \n", "8 0.000244 0.001133 \n", "23 0.000577 0.000155 \n", "\n", " strong_debiased_1_tnr_gender_gap strong_no_projection_tnr_gender_gap \\\n", "7 -0.001220 -0.001617 \n", "13 -0.007427 -0.004478 \n", "15 0.000095 -0.000012 \n", "27 -0.001289 -0.000758 \n", "12 0.000379 0.000429 \n", "10 0.000032 -0.000023 \n", "22 -0.004535 -0.002672 \n", "26 -0.002497 -0.004570 \n", "11 0.001286 0.001514 \n", "19 0.006125 0.006385 \n", "20 -0.001125 -0.000642 \n", "17 -0.001005 -0.000138 \n", "21 0.001349 0.004071 \n", "14 -0.000125 -0.000197 \n", "0 0.000390 0.000694 \n", "2 -0.001509 -0.001953 \n", "18 -0.000121 -0.000051 \n", "6 0.000532 0.000574 \n", "9 0.001236 0.001535 \n", "3 0.000345 0.000175 \n", "16 0.000741 0.000602 \n", "1 0.001076 0.001065 \n", "4 -0.000030 0.000366 \n", "24 0.009260 0.007132 \n", "25 0.004638 0.002488 \n", "5 0.001463 0.001543 \n", "8 0.000257 0.000211 \n", "23 0.000631 0.000491 \n", "\n", " strong_debiased_2_tnr_gender_gap strong_no_equalize_tnr_gender_gap \\\n", "7 -0.001530 -0.002213 \n", "13 -0.004807 -0.004455 \n", "15 0.000075 -0.000219 \n", "27 -0.001393 -0.001211 \n", "12 -0.000039 -0.000513 \n", "10 0.000024 -0.000676 \n", "22 -0.002096 -0.004275 \n", "26 -0.001141 -0.002609 \n", "11 0.001955 0.000651 \n", "19 0.006968 0.004761 \n", "20 -0.000209 -0.000453 \n", "17 -0.000473 -0.000470 \n", "21 -0.003034 -0.004298 \n", "14 0.000173 0.000315 \n", "0 0.000483 0.000683 \n", "2 -0.001427 -0.001875 \n", "18 -0.000025 0.001542 \n", "6 0.000563 0.000409 \n", "9 0.001641 0.001847 \n", "3 0.000617 0.000419 \n", "16 0.000453 0.001137 \n", "1 0.002815 0.005941 \n", "4 -0.000133 0.000633 \n", "24 0.006266 0.003512 \n", "25 0.002320 0.004059 \n", "5 0.001389 0.001676 \n", "8 0.000721 0.000564 \n", "23 0.000268 0.000834 \n", "\n", " glove_tnr_gender_gap strong_debiased_4_tnr_gender_gap \\\n", "7 -0.002810 -0.001630 \n", "13 -0.005866 -0.002840 \n", "15 -0.000164 -0.000060 \n", "27 -0.001211 -0.001081 \n", "12 -0.001008 0.000249 \n", "10 -0.000201 0.000216 \n", "22 -0.002278 -0.003760 \n", "26 -0.002664 -0.002461 \n", "11 0.000014 0.001617 \n", "19 0.007537 0.001844 \n", "20 -0.000933 -0.000733 \n", "17 -0.000456 -0.000816 \n", "21 -0.003673 -0.000717 \n", "14 -0.000022 -0.000223 \n", "0 0.000757 0.000344 \n", "2 -0.002338 -0.002469 \n", "18 0.001537 -0.000092 \n", "6 0.000801 0.000516 \n", "9 0.002094 0.001204 \n", "3 0.000127 0.000096 \n", "16 0.001293 0.000333 \n", "1 0.002935 0.002423 \n", "4 0.000753 0.000327 \n", "24 0.006474 0.004885 \n", "25 0.005013 0.003432 \n", "5 0.001567 0.001011 \n", "8 0.000206 0.000826 \n", "23 0.001200 0.000645 \n", "\n", " strong_debiased_3_tnr_gender_gap scrubbed_tnr_gender_gap \n", "7 -0.001412 -0.000266 \n", "13 -0.002707 -0.001573 \n", "15 -0.000236 0.000195 \n", "27 -0.001399 0.000299 \n", "12 0.000181 0.001161 \n", "10 -0.000013 0.000407 \n", "22 -0.002820 -0.001450 \n", "26 -0.001785 0.000671 \n", "11 0.001571 -0.002623 \n", "19 0.004126 0.000307 \n", "20 -0.000761 -0.000698 \n", "17 -0.000737 0.000032 \n", "21 -0.003564 -0.001995 \n", "14 0.000050 0.000144 \n", "0 0.000355 -0.000109 \n", "2 -0.001728 -0.000642 \n", "18 0.000170 -0.000297 \n", "6 0.000359 0.000283 \n", "9 0.000814 0.002261 \n", "3 0.000309 0.000236 \n", "16 0.000333 0.000199 \n", "1 0.003652 -0.001770 \n", "4 -0.000055 0.000581 \n", "24 0.004344 -0.000074 \n", "25 0.003316 0.000101 \n", "5 0.001245 0.002464 \n", "8 0.000299 -0.000025 \n", "23 0.000180 -0.000048 \n", "\n", "[28 rows x 24 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gender_gap_df.sort_values('frac_female', ascending = False)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# Fraction of comments where new model has lower\n", "# TPR gap than the baseline\n", "\n", "def compute_fraction_improved(df, baseline_model, improved_model):\n", " is_improved = np.abs(df[baseline_model]) >= np.abs(df[improved_model])\n", " return np.mean(is_improved)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "debiased_biosbias\n", "0.32142857142857145\n", "very_scrubbed\n", "0.7142857142857143\n", "debiased_tolga\n", "0.2857142857142857\n", "strong_debiased_1\n", "0.6428571428571429\n", "strong_no_projection\n", "0.6071428571428571\n", "strong_debiased_2\n", "0.7142857142857143\n", "strong_no_equalize\n", "0.39285714285714285\n", "glove\n", "1.0\n", "strong_debiased_4\n", "0.6071428571428571\n", "strong_debiased_3\n", "0.6071428571428571\n", "scrubbed\n", "0.8571428571428571\n" ] } ], "source": [ "for _model in MODEL_NAMES.values():\n", " print(_model)\n", " print(compute_fraction_improved(gender_gap_df, 'glove_tpr_gender_gap', '{}_tpr_gender_gap'.format(_model)))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "tpr_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\n", "tnr_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\n", "gender_gap_cols = tpr_cols + tnr_cols" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "debiased_biosbias_tpr_gender_gap 0.029446\n", "very_scrubbed_tpr_gender_gap 0.003786\n", "debiased_tolga_tpr_gender_gap 0.028584\n", "strong_debiased_1_tpr_gender_gap 0.014313\n", "strong_no_projection_tpr_gender_gap 0.015602\n", "strong_debiased_2_tpr_gender_gap 0.016134\n", "strong_no_equalize_tpr_gender_gap 0.025152\n", "glove_tpr_gender_gap 0.022636\n", "strong_debiased_4_tpr_gender_gap 0.016461\n", "strong_debiased_3_tpr_gender_gap 0.014632\n", "scrubbed_tpr_gender_gap 0.000189\n", "debiased_biosbias_tnr_gender_gap 0.000011\n", "very_scrubbed_tnr_gender_gap 0.000001\n", "debiased_tolga_tnr_gender_gap 0.000009\n", "strong_debiased_1_tnr_gender_gap 0.000009\n", "strong_no_projection_tnr_gender_gap 0.000006\n", "strong_debiased_2_tnr_gender_gap 0.000006\n", "strong_no_equalize_tnr_gender_gap 0.000006\n", "glove_tnr_gender_gap 0.000008\n", "strong_debiased_4_tnr_gender_gap 0.000003\n", "strong_debiased_3_tnr_gender_gap 0.000004\n", "scrubbed_tnr_gender_gap 0.000001\n", "dtype: float64" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(x**2))" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "debiased_biosbias_tpr_gender_gap 0.119049\n", "very_scrubbed_tpr_gender_gap 0.041268\n", "debiased_tolga_tpr_gender_gap 0.114932\n", "strong_debiased_1_tpr_gender_gap 0.075670\n", "strong_no_projection_tpr_gender_gap 0.079293\n", "strong_debiased_2_tpr_gender_gap 0.075149\n", "strong_no_equalize_tpr_gender_gap 0.102661\n", "glove_tpr_gender_gap 0.096764\n", "strong_debiased_4_tpr_gender_gap 0.083171\n", "strong_debiased_3_tpr_gender_gap 0.070882\n", "scrubbed_tpr_gender_gap 0.007773\n", "debiased_biosbias_tnr_gender_gap 0.002204\n", "very_scrubbed_tnr_gender_gap 0.000958\n", "debiased_tolga_tnr_gender_gap 0.002066\n", "strong_debiased_1_tnr_gender_gap 0.001811\n", "strong_no_projection_tnr_gender_gap 0.001657\n", "strong_debiased_2_tnr_gender_gap 0.001537\n", "strong_no_equalize_tnr_gender_gap 0.001866\n", "glove_tnr_gender_gap 0.001997\n", "strong_debiased_4_tnr_gender_gap 0.001316\n", "strong_debiased_3_tnr_gender_gap 0.001376\n", "scrubbed_tnr_gender_gap 0.000747\n", "dtype: float64" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(np.abs(x)))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "def plot_tpr_gap(df, _model):\n", " fig, ax = plt.subplots(figsize=(15, 6))\n", " x = 'frac_female'\n", " y = '{}_tpr_gender_gap'.format(_model)\n", " p1 = sns.regplot(x = x, y = y, data = df)\n", " p1.set(xlabel = \"% Female\", ylabel = \"TPR Gender Gap\", title = _model)\n", "\n", " for line in range(0,df.shape[0]):\n", " p1.text(results_df[x][line]+0.01, df[y][line], df['label_profession'][line], horizontalalignment='left', size='medium', color='black')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "for _model in MODEL_NAMES.values():\n", " if 'untuned' in _model:\n", " plot_tpr_gap(results_df, _model)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
frac_female
frac_female1.000000
debiased_biosbias_tpr_gender_gap0.829982
very_scrubbed_tpr_gender_gap0.458378
debiased_tolga_tpr_gender_gap0.824882
strong_debiased_1_tpr_gender_gap0.716922
strong_no_projection_tpr_gender_gap0.709000
strong_debiased_2_tpr_gender_gap0.596896
strong_no_equalize_tpr_gender_gap0.772645
glove_tpr_gender_gap0.794059
strong_debiased_4_tpr_gender_gap0.550435
strong_debiased_3_tpr_gender_gap0.707174
scrubbed_tpr_gender_gap-0.282919
\n", "
" ], "text/plain": [ " frac_female\n", "frac_female 1.000000\n", "debiased_biosbias_tpr_gender_gap 0.829982\n", "very_scrubbed_tpr_gender_gap 0.458378\n", "debiased_tolga_tpr_gender_gap 0.824882\n", "strong_debiased_1_tpr_gender_gap 0.716922\n", "strong_no_projection_tpr_gender_gap 0.709000\n", "strong_debiased_2_tpr_gender_gap 0.596896\n", "strong_no_equalize_tpr_gender_gap 0.772645\n", "glove_tpr_gender_gap 0.794059\n", "strong_debiased_4_tpr_gender_gap 0.550435\n", "strong_debiased_3_tpr_gender_gap 0.707174\n", "scrubbed_tpr_gender_gap -0.282919" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gender Prediction Analysis" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# Which model does this correspond to?\n", "model_name = 'tf_gru_attention_multiclass_gender_biosbias_glove:v_20190405_142640'\n", "gender_df['correct'] = ((gender_df['gender'] == 'M') == gender_df[model_name])" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8423\n" ] } ], "source": [ "acc = gender_df.correct.sum()/gender_df.correct.count()\n", "print('Accuracy: {:.4f}'.format(acc))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "models_eval_py2", "language": "python", "name": "models_eval_py2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: model_evaluation/Predict bias.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow.contrib.framework.python.framework import checkpoint_utils\n", "\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "from sklearn.preprocessing import normalize\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "MODEL_DIR_OCCUPATION = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103117/model_dir/model.ckpt-100000'\n", "MODEL_DIR_GENDER = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_warmstart_biosbias_glove/20190404_151521/model_dir/model.ckpt-191000'\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Extract two matrices." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('dense/bias', [1])\n", "('dense/bias/Adam', [1])\n", "('dense/bias/Adam_1', [1])\n", "('dense/kernel', [256, 1])\n", "('dense/kernel/Adam', [256, 1])\n", "('dense/kernel/Adam_1', [256, 1])\n", "('dense_1/bias', [128])\n", "('dense_1/bias/Adam', [128])\n", "('dense_1/bias/Adam_1', [128])\n", "('dense_1/kernel', [256, 128])\n", "('dense_1/kernel/Adam', [256, 128])\n", "('dense_1/kernel/Adam_1', [256, 128])\n", "('dense_2/bias', [33])\n", "('dense_2/bias/Adam', [33])\n", "('dense_2/bias/Adam_1', [33])\n", "('dense_2/kernel', [128, 33])\n", "('dense_2/kernel/Adam', [128, 33])\n", "('dense_2/kernel/Adam_1', [128, 33])\n", "('embeddings', [400002, 100])\n", "('global_step', [])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam', [256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam_1', [256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam', [356, 256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam_1', [356, 256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam', [512])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam_1', [512])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam', [356, 512])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam_1', [356, 512])\n", "('signal_early_stopping/STOP', [])\n", "('title/beta1_power', [])\n", "('title/beta2_power', [])\n" ] } ], "source": [ "var_list = checkpoint_utils.list_variables(MODEL_DIR_OCCUPATION)\n", "for v in var_list:\n", " print(v)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "kernel_occupation = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_OCCUPATION, 'dense_2/kernel'))" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('beta1_power', [])\n", "('beta2_power', [])\n", "('dense/bias', [1])\n", "('dense/kernel', [256, 1])\n", "('dense_1/bias', [128])\n", "('dense_1/kernel', [256, 128])\n", "('embeddings', [400002, 100])\n", "('final_layer/bias', [33])\n", "('final_layer/bias/Adam', [33])\n", "('final_layer/bias/Adam_1', [33])\n", "('final_layer/kernel', [128, 33])\n", "('final_layer/kernel/Adam', [128, 33])\n", "('final_layer/kernel/Adam_1', [128, 33])\n", "('global_step', [])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])\n", "('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])\n", "('signal_early_stopping/STOP', [])\n" ] } ], "source": [ "var_list = checkpoint_utils.list_variables(MODEL_DIR_GENDER)\n", "for v in var_list:\n", " print(v)" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [], "source": [ "kernel_gender = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_GENDER, 'final_layer/kernel'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compute cosine." ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "TITLE_LABELS = [\n", " 'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',\n", " 'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',\n", " 'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',\n", " 'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',\n", " 'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(33, 128)" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kernel_gender.shape" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "kernel_gender_female = normalize(kernel_gender[0].reshape(1, -1))\n", "kernel_gender_male = normalize(kernel_gender[1].reshape(1, -1))" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0.01618018, 0.1003583 , -0.0723118 , 0.06453013, 0.22758739,\n", " 0.06790616, 0.08027411, 0.10015733, -0.05590729, 0.023418 ,\n", " 0.06255525, -0.02604564, 0.09049062, -0.01601316, 0.08945937,\n", " -0.11582728, 0.06244883, 0.07855629, 0.01956639, -0.06774757,\n", " 0.00614625, -0.03594974, 0.0652191 , -0.05078628, -0.00807877,\n", " 0.06896302, 0.11013658, -0.04664179, 0.11593511, 0.17774113,\n", " 0.09496382, 0.12176205, 0.04098931, -0.0970282 , 0.02898299,\n", " 0.10654851, -0.13562816, 0.03486229, 0.12194955, 0.02276845,\n", " 0.04589143, -0.06606348, -0.00129113, -0.07973252, -0.02630814,\n", " -0.09769032, -0.1640446 , -0.07602697, 0.00429134, 0.06098389,\n", " 0.02934178, -0.07209212, -0.11304612, 0.29547158, -0.04287611,\n", " -0.04518875, -0.02993831, 0.06304532, 0.07989506, -0.09601919,\n", " 0.20816126, -0.1977993 , 0.15119584, 0.01456547, 0.06435941,\n", " -0.07794361, -0.00554093, 0.05497926, 0.0931736 , 0.22706528,\n", " -0.08019326, -0.0819607 , 0.04490028, -0.01723337, 0.04124108,\n", " 0.13199665, -0.01417105, 0.0725795 , -0.05172402, -0.13563272,\n", " -0.07302421, 0.24843292, 0.14667384, -0.02692026, 0.15892392,\n", " 0.02655477, -0.00804625, 0.00184608, 0.02203059, 0.00078905,\n", " 0.0115315 , 0.00199543, 0.05942026, 0.07089076, -0.04697848,\n", " -0.01500242, -0.02432874, -0.02453819, -0.13443194, -0.00370577,\n", " -0.03219581, -0.07874984, -0.05446392, 0.05492223, -0.11461313,\n", " -0.00379655, 0.01339969, -0.01030909, 0.0601744 , 0.00417376,\n", " -0.02308951, -0.1329045 , -0.00130105, 0.0959954 , 0.03397062,\n", " 0.11269465, 0.00561908, 0.00870924, 0.0339431 , 0.01517005,\n", " -0.05439634, -0.02544309, -0.13284749, 0.04113958, 0.03033615,\n", " -0.08890872, -0.09986325, -0.09274729]], dtype=float32)" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kernel_gender_male" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "kernel_gender_mean = normalize((kernel_gender_female + kernel_gender_male) / 2)\n", "direction_male = kernel_gender_male - np.sum(np.multiply(kernel_gender_male, kernel_gender_mean))* kernel_gender_mean\n", "direction_female = kernel_gender_female - np.sum(np.multiply(kernel_gender_female, kernel_gender_mean))* kernel_gender_mean" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[-1.]], dtype=float32)" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cosine_similarity(direction_female, direction_male)" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "# Compute mean\n", "kernel_occupation_mean = np.mean(kernel_occupation, axis=0)\n", "# Apply x - np.sum(np.multiply(x, mean))* mean" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accountant:-0.166758477688\n", "acupuncturist:0.0150433778763\n", "architect:-0.106728702784\n", "attorney:-0.0355984941125\n", "chiropractor:-0.112065583467\n", "comedian:-0.17996160686\n", "composer:-0.154989466071\n", "dentist:-0.00389941781759\n", "dietitian:0.00302037596703\n", "dj:-0.156128510833\n", "filmmaker:-0.116180986166\n", "interior_designer:-0.00478803366423\n", "journalist:-0.0217301938683\n", "landscape_architect:-0.00763043016195\n", "magician:-0.00733107328415\n", "massage_therapist:-0.0116159021854\n", "model:0.0549785941839\n", "nurse:0.099561393261\n", "painter:0.0174702480435\n", "paralegal:0.0106164813042\n", "pastor:-0.161623597145\n", "personal_trainer:-0.133440434933\n", "photographer:-0.0985902026296\n", "physician:-0.00131351128221\n", "poet:-0.061441861093\n", "professor:0.00782079994678\n", "psychologist:0.00208866596222\n", "rapper:-0.112389668822\n", "real_estate_broker:-0.000683411955833\n", "software_engineer:-0.0237298682332\n", "surgeon:-0.0968104675412\n", "teacher:-0.0625882595778\n", "yoga_teacher:0.0292760580778\n" ] } ], "source": [ "for i in range(33):\n", "# _bias = np.abs(cosine_similarity(kernel_gender_female, kernel_occupation[i].reshape(1, -1))) + \\\n", "# np.abs(cosine_similarity(kernel_gender_male, kernel_occupation[i].reshape(1, -1)))\n", "\n", " _bias = (cosine_similarity(kernel_gender_female - kernel_gender_male, kernel_occupation[i].reshape(1, -1)))\n", "\n", "# _bias = cosine_similarity(direction_male, kernel_occupation[i].reshape(1, -1))\n", "\n", "# x = kernel_occupation[i].reshape(1, -1)\n", "# x = x - np.sum(np.multiply(x, kernel_occupation_mean))* kernel_occupation_mean\n", "# _bias = cosine_similarity(direction_male, x)\n", "\n", " print ('{}:{}'.format(TITLE_LABELS[i], float(_bias)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.14+" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: model_evaluation/README.md ================================================ # Evaluation Pipeline for Text classification models. This directory contains utilities to use a model deployed on cloud MLE (in 'utils_export/'), and some notebooks to illustrate a typical evaluation pipeline. ## Environment Setup ### Python Dependencies Install library dependencies (it is optional, but recommended to install these in a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html): ```shell # The python2 way to create and use virtual environment # (optional, but recommended): virtualenv .pyenv source .pyenv/bin/activate # Install dependencies pip install -r requirements.txt jupyter notebook # ... do stuff ... # Exit your virtual environment. deactivate ``` ### Google Cloud Storage dependencies If you need to access data located in Google Cloud Storage, you must [install the Google Cloud SDK](https://cloud.google.com/sdk/docs/) and initialize it within your virtual environment. ## Evaluating one model The notebook `jigsaw_evaluation_pipeline.ipynb` provides a example of running on evaluation metrics for the ml-fairness project. We use the `Dataset` and `Model` utilities from `utils_export/` to interact with the models deployed on CMLE and execute the following steps: * Load two datasets: 1 dataset to evaluate performance (or intended bias) similar to the training data, and 1 dataset to evaluate the unintended bias that includes identity information. * Run the model on each dataset and collect the predictions. * Compute evaluation metrics: AUC on the first dataset, pinned_auc on the second one. ## Evaluating several models This is useful to compare different training runs (with different parameters) but also to compare the evaluation metrics during the training run (several models exported during 1 training run). TODO(fprost): Write description once the notebook is pushed ## Cloud MLE utilities The utility library `utils_export/` intends to simplify the use of CMLE deployed models. ### Typical usage pattern This library will handle the following "overhead" tasks: * Convert your pandas `DataFrame` into tf-records, adding an `example_key` to each example. * Send an HTTP request to CMLE to run a batch prediction job. * Wait for job completion. * Parse prediction files and join results with the initial `DataFrame` based on `example_key`. ```python input_fn = ... (returns pandas DataFrame). dataset = Dataset(input_fn, dataset_dir) dataset.load_data(10000) model = Model(...) dataset.add_model_prediction_to_data(model) OR dataset.add_model_prediction_to_data(model, recompute=False) dataset.show_data() ``` ### `Model` A `Model` instance describes the key components of a CMLE model. Key parameters are: * how to access the model: project_name, model_names. * what the expected inputs to the models are and their respective types (see EncodingFeatureSpec). The types are important to find the right encoding function for TF-records. * what the model outputs are. Example: ```python model = Model( feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING}, prediction_keys='prediction_key', model_names=['model_name1:version1', 'model_name1:version2', 'model_name2:version1'] project_name='wikidetox') ``` ### `Dataset` A `Dataset` instance is related to a pandas `DataFrame` and will be progressively augmented with the model predictions. The dataset attributes are: * `input_fn`: a function that returns a `DataFrame` (input_data). * `DATASET_DIR`: where to save/load all the files associated with the `Dataset`, in particular input_tf_records and cloud mle predictions. ================================================ FILE: model_evaluation/deploy_models.sh ================================================ #!/bin/bash MODEL_DIRS='gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103329/model_dir,'\ 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103300/model_dir,'\ 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103254/model_dir,'\ 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103245/model_dir,'\ 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103232/model_dir,'\ 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103209/model_dir,'\ 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103152/model_dir,'\ 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103117/model_dir' python utils_export/deploy_list_models.py --list_model_dir=$MODEL_DIRS --model_name 'tf_test_fprost' ================================================ FILE: model_evaluation/few_shot_learning_baseline_evaluation.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "import getpass\n", "from IPython.display import display\n", "import json\n", "import nltk\n", "import numpy as np\n", "import pandas as pd\n", "import pkg_resources\n", "import os\n", "import random\n", "import re\n", "import seaborn as sns\n", "import sklearn.metrics as metrics\n", "\n", "import tensorflow as tf\n", "from tensorflow.python.lib.io import file_io" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from utils_export.dataset import Dataset, Model\n", "from utils_export import utils_cloudml\n", "from utils_export import utils_tfrecords" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Faster to access GCS file:\n", "# https://github.com/tensorflow/tensorflow/issues/15530\n", "os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[nltk_data] Downloading package punkt to /Users/msushkov/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nltk.download('punkt')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def tokenizer(text, lowercase=True):\n", " \"\"\"Converts text to a list of words.\n", "\n", " Args:\n", " text: piece of text to tokenize (string).\n", " lowercase: whether to include lowercasing in preprocessing (bool).\n", "\n", " Returns:\n", " A list of strings (words).\n", " \"\"\"\n", " words = nltk.word_tokenize(text.decode('utf-8'))\n", " if lowercase:\n", " words = [w.lower() for w in words]\n", " return words" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def make_test_input_fn(dataset_path,\n", " model_text_feature,\n", " dataset_text_feature,\n", " data_label,\n", " tokenizer_fn,\n", " label_data_type=tf.float32,\n", " max_n_examples=None,\n", " random_filter_keep_rate=1.0):\n", " \"\"\"Returns a test input function.\n", " \n", " Args:\n", " dataset_path (str): Path to dataset.\n", " model_text_feature (str): The feature column corresponding to the\n", " text input the model expects.\n", " dataset_text_feature (str): The name of the text feature of the dataset.\n", " data_label (str): The output label for the dataset.\n", " tokenizer_fn: Tokenizer function (str -> list).\n", " max_n_examples (int): How many examples to evaluate on.\n", " random_filter_keep_rate (float): Filter out test examples with this probability.\n", "\n", " Returns:\n", " Test input function.\n", " \"\"\"\n", " decoding_input_features = {\n", " dataset_text_feature: tf.FixedLenFeature([], dtype=tf.string),\n", " data_label: tf.FixedLenFeature([], dtype=label_data_type)\n", " }\n", "\n", " def test_input_fn(max_n_examples=max_n_examples,\n", " random_filter_keep_rate=random_filter_keep_rate):\n", " \"\"\"Test input function.\n", " \n", " Args:\n", " max_n_examples (int): How many examples to evaluate on.\n", " random_filter_keep_rate (float): Filter out test examples with this probability.\n", " \n", " Returns:\n", " DataFrame with the results.\n", " \"\"\"\n", " res = utils_tfrecords.decode_tf_records_to_pandas(\n", " decoding_input_features,\n", " dataset_path,\n", " max_n_examples,\n", " random_filter_keep_rate)\n", " if not tokenizer_fn:\n", " tok = lambda x: [x]\n", " res[model_text_feature] = list(map(tok, res[dataset_text_feature]))\n", " else:\n", " res[model_text_feature] = list(map(tokenizer_fn, res[dataset_text_feature]))\n", " res = res.rename(columns={ data_label: 'label' })\n", " res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))\n", " final = res.copy(deep=True)\n", " return final\n", "\n", " return test_input_fn" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def print_results(results_df, model_names, print_pr_curve=False):\n", " \"\"\"Print the classification results.\n", " \n", " Args:\n", " results_df: DataFrame with the results.\n", " model_names: List of strings representing the models for which we have results.\n", " \"\"\"\n", " labels = results_df['label']\n", " for _model in model_names:\n", " print(_model)\n", " model_preds = results_df[_model]\n", " fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)\n", " roc_auc = metrics.auc(fpr, tpr)\n", " recalls, precisions, thr = metrics.precision_recall_curve(labels, model_preds)\n", " pr_auc = metrics.auc(precisions, recalls)\n", " model_preds_binary = (model_preds > 0.5).astype(np.int_)\n", " f1 = metrics.f1_score(labels, model_preds_binary)\n", " print('\\tROC AUC: {}'.format(roc_auc))\n", " print('\\tPR AUC: {}'.format(pr_auc))\n", " print('\\tF1: {}'.format(f1))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "PROJECT_NAME = 'conversationai-models'\n", "SENTENCE_KEY = 'comment_key' #Input key\n", "\n", "# Pattern for path of tf_records\n", "OUTPUT_DIR_BASE = os.path.join(\n", " 'gs://conversationai-models',\n", " getpass.getuser(),\n", " 'tfrecords')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate models on Civil Comments dataset" ] }, { "cell_type": "code", "execution_count": 175, "metadata": {}, "outputs": [], "source": [ "LABEL_NAME_PREDICTION_MODEL = 'toxicity/logistic'\n", "DATASET = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/test-*.tfrecord'\n", "DATA_LABEL = 'toxicity'\n", "DATASET_TEXT_FEATURE='comment_text'\n", "\n", "# Pattern for path of tf_records\n", "OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'civil_comments_test')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CNN, GRU Attention Models" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'tokens'\n", "MODEL_NAMES = [\n", " 'tf_cnn_civil_comments_glove:v_20190219_185541',\n", " 'tf_gru_attention_civil_comments_glove:v_20190219_185619',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, tokenizer)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n", "test_dataset.load_data(10000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [], "source": [ "civil_comments_test_df = test_dataset.show_data()" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_cnn_civil_comments_glove:v_20190219_185541\n", "\tROC AUC: 0.9573435242534393\n", "\tPR AUC: 0.6729934425219886\n", "tf_gru_attention_civil_comments_glove:v_20190219_185619\n", "\tROC AUC: 0.9649161132104584\n", "\tPR AUC: 0.7486011745102973\n" ] } ], "source": [ "print_results(civil_comments_test_df, MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### TF-Hub Model" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'text'\n", "MODEL_NAMES = [\n", " 'tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, None)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n", "test_dataset.load_data(10000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [], "source": [ "civil_comments_hub_df = test_dataset.show_data()" ] }, { "cell_type": "code", "execution_count": 194, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552\n", "\tROC AUC: 0.9595451744696132\n", "\tPR AUC: 0.7429338592289392\n" ] } ], "source": [ "print_results(civil_comments_hub_df, MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate models on Toxicity dataset" ] }, { "cell_type": "code", "execution_count": 195, "metadata": {}, "outputs": [], "source": [ "LABEL_NAME_PREDICTION_MODEL = 'frac_neg/logistic'\n", "DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'\n", "DATA_LABEL = 'frac_neg'\n", "DATASET_TEXT_FEATURE='comment_text'\n", "\n", "# Pattern for path of tf_records\n", "OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'toxicity_test')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CNN, GRU Attention Models" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'tokens'\n", "MODEL_NAMES = [\n", " 'tf_cnn_toxicity_glove:v_20190219_185532',\n", " 'tf_gru_attention_toxicity_glove:v_20190219_185516',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, tokenizer)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n", "test_dataset.load_data(10000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [], "source": [ "toxicity_test_df1 = test_dataset.show_data()" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_cnn_toxicity_glove:v_20190219_185532\n", "\tROC AUC: 0.951760553925346\n", "\tPR AUC: 0.8740274773143215\n", "tf_gru_attention_toxicity_glove:v_20190219_185516\n", "\tROC AUC: 0.9543916575133977\n", "\tPR AUC: 0.8814208812923074\n" ] } ], "source": [ "print_results(toxicity_test_df1, MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### TF-Hub Model" ] }, { "cell_type": "code", "execution_count": 196, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'text'\n", "MODEL_NAMES = [\n", " 'tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, None)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n", "test_dataset.load_data(10000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 200, "metadata": {}, "outputs": [], "source": [ "toxicity_test_df2 = test_dataset.show_data()" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427\n", "\tROC AUC: 0.9270843170934745\n", "\tPR AUC: 0.8155815559085313\n" ] } ], "source": [ "print_results(toxicity_test_df2, MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate models on Many Communities dataset (full)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "LABEL_NAME_PREDICTION_MODEL = 'removed/logistic'\n", "DATASET = 'gs://conversationai-models/resources/transfer_learning_data/many_communities/20181105_answers_all_columns_nthain.tfrecord'\n", "DATA_LABEL = 'removed'\n", "DATASET_TEXT_FEATURE='comment_text'\n", "\n", "# Pattern for path of tf_records\n", "OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'many_communities_test')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CNN, GRU Attention Models" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'tokens'\n", "MODEL_NAMES = [\n", " 'tf_cnn_many_communities_glove:v_20190219_185551_gpu_p100_4',\n", " #'tf_gru_attention_many_communities:v20190322_142800_507893_1556085643',\n", " #'tf_gru_attention_many_communities:v20190315_161037_23271_1555129264',\n", " 'tf_gru_attention_many_communities:v20190705_004839_507000_1562364428_gpu_p100_4',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, tokenizer, label_data_type=tf.int64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n", "test_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 318, "metadata": {}, "outputs": [], "source": [ "many_communities_test_df = test_dataset.show_data()" ] }, { "cell_type": "code", "execution_count": 319, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_cnn_many_communities_glove:v_20190219_185551\n", "\tROC AUC: 0.7476941464055139\n", "\tPR AUC: 0.07604839414024091\n", "tf_gru_attention_many_communities:v20190315_161037_23271_1555129264\n", "\tROC AUC: 0.7215269560475308\n", "\tPR AUC: 0.06656538517176142\n" ] } ], "source": [ "print_results(many_communities_test_df, MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### TF-Hub Model" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'text'\n", "MODEL_NAMES = [\n", " 'tf_hub_classifier_many_communities:v20190219_185602_316000_1553563221_gpu_v100_4',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, None, label_data_type=tf.int64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\n", "test_dataset.load_data(10000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "many_communities_tfhub_test_df = test_dataset.show_data()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print_results(many_communities_tfhub_test_df, MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate models on Many Communities subset (adapted for few-shot learning)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "LABEL_NAME_PREDICTION_MODEL = 'label/logistic'\n", "DATASET_VALID = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord'\n", "DATASET_TEST = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord'\n", "DATA_LABEL = 'label'\n", "DATASET_TEXT_FEATURE='text'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pessimistic" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Pattern for path of tf_records\n", "OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/valid')\n", "OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/test')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### CNN, GRU Attention Models" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'tokens'\n", "MODEL_NAMES = [\n", " 'tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1',\n", " 'tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "valid_input_fn = make_test_input_fn(\n", " DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, tokenizer, label_data_type=tf.int64)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, tokenizer, label_data_type=tf.int64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n", "valid_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n", "test_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1\n", "\tROC AUC: 0.8233381391772395\n", "\tPR AUC: 0.8062951511107903\n", "\tF1: 0.7607565011820331\n", "tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1\n", "\tROC AUC: 0.8303615196078432\n", "\tPR AUC: 0.8125045070656154\n", "\tF1: 0.7703703703703705\n" ] } ], "source": [ "print_results(valid_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1\n", "\tROC AUC: 0.7981477681641835\n", "\tPR AUC: 0.7900106468171257\n", "\tF1: 0.7378091872791519\n", "tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1\n", "\tROC AUC: 0.8074846866462235\n", "\tPR AUC: 0.7951370231895221\n", "\tF1: 0.7507100720996286\n" ] } ], "source": [ "print_results(test_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### TF-Hub Model" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'text'\n", "MODEL_NAMES = [\n", " 'tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "valid_input_fn = make_test_input_fn(\n", " DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, None, label_data_type=tf.int64)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, None, label_data_type=tf.int64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n", "valid_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n", "test_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1\n", "\tROC AUC: 0.8612435121107267\n", "\tPR AUC: 0.851153195076283\n", "\tF1: 0.7937575030012005\n" ] } ], "source": [ "print_results(valid_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1\n", "\tROC AUC: 0.8434673869262717\n", "\tPR AUC: 0.8326080326940988\n", "\tF1: 0.779380468195791\n" ] } ], "source": [ "print_results(test_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Optimistic" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# Pattern for path of tf_records\n", "OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/valid')\n", "OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/test')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### CNN, GRU Attention Models" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'tokens'\n", "MODEL_NAMES = [\n", " 'tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1',\n", " 'tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "valid_input_fn = make_test_input_fn(\n", " DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, tokenizer, label_data_type=tf.int64)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, tokenizer, label_data_type=tf.int64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n", "valid_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n", "test_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1\n", "\tROC AUC: 0.8304709727028066\n", "\tPR AUC: 0.8191225889787218\n", "\tF1: 0.7564259485924112\n", "tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1\n", "\tROC AUC: 0.8293254998077663\n", "\tPR AUC: 0.8181913933482414\n", "\tF1: 0.7652214022140222\n" ] } ], "source": [ "print_results(valid_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1\n", "\tROC AUC: 0.8043942295635125\n", "\tPR AUC: 0.79754755517453\n", "\tF1: 0.7305737109658679\n", "tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1\n", "\tROC AUC: 0.8156875904836816\n", "\tPR AUC: 0.8081941065311745\n", "\tF1: 0.7558876811594204\n" ] } ], "source": [ "print_results(test_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### TF-Hub Model" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "MODEL_TEXT_FEATURE = 'text'\n", "MODEL_NAMES = [\n", " 'tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1',\n", "]\n", "\n", "model_input_spec = {\n", " MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n", "}\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)\n", "\n", "valid_input_fn = make_test_input_fn(\n", " DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, None, label_data_type=tf.int64)\n", "\n", "test_input_fn = make_test_input_fn(\n", " DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\n", " DATA_LABEL, None, label_data_type=tf.int64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\n", "valid_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Need to set seed before loading data to be able to reload same data in the future\n", "random.seed(2018)\n", "\n", "test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\n", "test_dataset.load_data(100000000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\n", "test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1\n", "\tROC AUC: 0.8680750192233757\n", "\tPR AUC: 0.8623373414090059\n", "\tF1: 0.7900994904149479\n" ] } ], "source": [ "print_results(valid_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1\n", "\tROC AUC: 0.8526337876041631\n", "\tPR AUC: 0.8481017558154519\n", "\tF1: 0.784984556901877\n" ] } ], "source": [ "print_results(test_dataset.show_data(), MODEL_NAMES)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Finetuned" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import csv\n", "import matplotlib.pyplot as plt\n", "from sklearn.utils import fixes" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def get_list_results_files(parent_dir):\n", " \"\"\"Gets the paths of all results files that are in parent_dir.\"\"\"\n", " file_list = []\n", " for subdirectory, _, files in tf.gfile.Walk(parent_dir):\n", " [file_list.append(os.path.join(parent_dir, fname)) for fname in files]\n", " return file_list" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "def load_csv_predictions(pred_file, is_test=False):\n", " \"\"\"Load the CSV file with predictions and labels.\"\"\"\n", " model_predictions = None\n", " labels = None\n", " communities = None\n", " names = ['label', 'pred', 'community']\n", " if is_test:\n", " names = ['community', 'label', 'pred']\n", " with file_io.FileIO(pred_file, 'r') as f:\n", " df = pd.read_csv(f, header=None, names=names)\n", " labels = df['label'].values\n", " model_predictions = df['pred'].values\n", " communities = df['community'].values\n", " return labels, model_predictions, communities" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "def plot_pr_curve(precisions, recalls, identifier=None):\n", " \"\"\"Plots the Precision/Recall curve.\n", " Args:\n", " precisions: Precisions at all score thresholds.\n", " recalls: Recalls at all score thresholds.\n", " identifier: Optional string indicating what this curve is.\n", " \"\"\"\n", " precision_recall_auc = metrics.auc(recalls, precisions)\n", " plt.figure()\n", " step_kwargs = ({\n", " 'step': 'post'\n", " } if 'step' in fixes.signature(plt.fill_between).parameters else {})\n", " plt.step(recalls, precisions, color='b', alpha=0.2, where='post')\n", " plt.fill_between(recalls, precisions, alpha=0.2, color='b', **step_kwargs)\n", " plt.xlabel('Recall')\n", " plt.ylabel('Precision')\n", " plt.ylim([0.0, 1.05])\n", " plt.xlim([0.0, 1])\n", " if identifier:\n", " plt.title('PR curve for %s (AUC = %.2f).' % (\n", " identifier, precision_recall_auc))\n", " else:\n", " plt.title('PR curve (AUC = %.2f).' % precision_recall_auc)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def compute_metrics_from_dir(results_dir, is_test=False):\n", " files = get_list_results_files(results_dir)\n", " for file_path in files:\n", " curr_trial_name = os.path.basename(file_path)\n", " print(curr_trial_name)\n", " labels, model_preds, communities = load_csv_predictions(file_path, is_test)\n", " fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)\n", " roc_auc = metrics.auc(fpr, tpr)\n", " precisions, recalls, thr = metrics.precision_recall_curve(labels, model_preds)\n", " pr_auc = metrics.auc(recalls, precisions)\n", " model_preds_binary = (model_preds > 0.5).astype(np.int_)\n", " f1 = metrics.f1_score(labels, model_preds_binary)\n", " print('\\tROC AUC: {}'.format(roc_auc))\n", " print('\\tPR AUC: {}'.format(pr_auc))\n", " print('\\tF1: {}'.format(f1))\n", " plot_pr_curve(precisions, recalls, curr_trial_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Validation" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "TF_CNN_VALID_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/validation\"\n", "TF_GRU_VALID_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/validation\"\n", "TF_HUB_VALID_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/validation\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compute_metrics_from_dir(TF_CNN_VALID_RESULTS_DIR)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compute_metrics_from_dir(TF_GRU_VALID_RESULTS_DIR)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compute_metrics_from_dir(TF_HUB_VALID_RESULTS_DIR)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Test" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "TF_CNN_TEST_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/test\"\n", "TF_GRU_TEST_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/test\"\n", "TF_HUB_TEST_RESULTS_DIR = \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/test\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compute_metrics_from_dir(TF_CNN_TEST_RESULTS_DIR, is_test=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compute_metrics_from_dir(TF_GRU_TEST_RESULTS_DIR, is_test=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compute_metrics_from_dir(TF_HUB_TEST_RESULTS_DIR)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: model_evaluation/input_fn_example.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines some examples of input_fn for the evaluation notebook.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import numpy as np import pandas as pd import pkg_resources import os import random import re import tensorflow as tf from tensorflow.python.lib.io import file_io from unintended_ml_bias import model_bias_analysis from utils_export import utils_tfrecords #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530 os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #TODO(fprost): Clean this file. #### #### #### #### #### #### #### PERFORMANCE DATASET #### #### #### #### #### #### #### def create_input_fn_toxicity_performance(tokenizer, model_input_comment_field): """Generates an input_fn to evaluate model performance on toxicity dataset.""" TOXICITY_PERFORMANCE_DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord' TOXICITY_DATA_LABEL = 'frac_neg' #Name of the label in the dataset TOXICITY_COMMENT_NAME = 'comment_text' #Name of the comment in the dataset # DECODING decoding_input_features = { TOXICITY_COMMENT_NAME: tf.FixedLenFeature([], dtype=tf.string), TOXICITY_DATA_LABEL: tf.FixedLenFeature([], dtype=tf.float32) } def input_fn_performance_toxicity(max_n_examples=None, random_filter_keep_rate=1.0): res = utils_tfrecords.decode_tf_records_to_pandas( decoding_input_features, TOXICITY_PERFORMANCE_DATASET, max_n_examples, random_filter_keep_rate) res[model_input_comment_field] = list( map(tokenizer, res[TOXICITY_COMMENT_NAME])) res = res.rename(columns={TOXICITY_DATA_LABEL: 'label'}) res['label'] = list(map(lambda x: bool(round(x)), list(res['label']))) final = res.copy(deep=True) return final return input_fn_performance_toxicity #### #### #### #### #### #### CIVIL DATASET #### #### #### #### #### #### CIVIL_COMMENTS_PATH = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/eval-00000-of-00003.tfrecord' THRESHOLD_BIAS_CIVIL = 0.5 civil_comments_spec = { 'comment_text': tf.FixedLenFeature([], dtype=tf.string), 'id': tf.FixedLenFeature([], dtype=tf.string), 'toxicity': tf.FixedLenFeature([], dtype=tf.float32), 'severe_toxicity': tf.FixedLenFeature([], dtype=tf.float32), 'obscene': tf.FixedLenFeature([], dtype=tf.float32), 'sexual_explicit': tf.FixedLenFeature([], dtype=tf.float32), 'identity_attack': tf.FixedLenFeature([], dtype=tf.float32), 'insult': tf.FixedLenFeature([], dtype=tf.float32), 'threat': tf.FixedLenFeature([], dtype=tf.float32), 'toxicity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64), 'identity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64), 'male': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'female': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'transgender': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'other_gender': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'heterosexual': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'homosexual_gay_or_lesbian': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'bisexual': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'other_sexual_orientation': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'christian': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'jewish': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'muslim': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'hindu': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'buddhist': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'atheist': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'other_religion': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'black': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'white': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'asian': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'latino': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'other_race_or_ethnicity': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'physical_disability': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'intellectual_or_learning_disability': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'psychiatric_or_mental_illness': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), 'other_disability': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.), } identity_terms_civil = [ 'male', 'female', 'transgender', 'other_gender', 'heterosexual', 'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation', 'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist', 'other_religion', 'black', 'white', 'asian', 'latino', 'other_race_or_ethnicity', 'physical_disability', 'intellectual_or_learning_disability', 'psychiatric_or_mental_illness', 'other_disability' ] CIVIL_COMMENT_NAME = 'comment_text' def create_input_fn_civil_performance(tokenizer, model_input_comment_field): """Generates an input_fn to evaluate model performance on civil dataset.""" def input_fn_performance_civil(max_n_examples=None, random_filter_keep_rate=1.0): civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas( civil_comments_spec, CIVIL_COMMENTS_PATH, max_n_examples=max_n_examples, random_filter_keep_rate=random_filter_keep_rate, ) civil_df_raw[CIVIL_COMMENT_NAME] = list( map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME])) civil_df_raw['toxicity'] = list( map(lambda x: bool(round(x)), list(civil_df_raw['toxicity']))) civil_df_raw = civil_df_raw.rename(columns={ CIVIL_COMMENT_NAME: model_input_comment_field, 'toxicity': 'label' }) res = civil_df_raw.copy(deep=True) return res return input_fn_performance_civil def create_input_fn_civil_bias(tokenizer, model_input_comment_field): """"Generates an input_fn to evaluate model bias on civil dataset. Construction of this database such as: We keep only examples that have identity labels (with rule: male >=0). We apply the 'threshold_bias_civil' for each identity field. We select x% of the "background", i.e. examples that are 0 for each identify. Indeed, as the background is dominant, we want to reduce the size of the test set. """ def filter_fn_civil(example, background_filter_keep_rate=0.1): if example['male'] < 0.: return False contains_one_identity = False for _term in identity_terms_civil: if example[_term] >= THRESHOLD_BIAS_CIVIL: contains_one_identity = True if contains_one_identity: return True else: return (random.random() < background_filter_keep_rate) def input_fn_bias_civil(max_n_examples=None): civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas( civil_comments_spec, CIVIL_COMMENTS_PATH, max_n_examples=max_n_examples, filter_fn=filter_fn_civil, ) civil_df_raw[CIVIL_COMMENT_NAME] = list( map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME])) for _term in identity_terms_civil: civil_df_raw[_term] = list( map(lambda x: x >= THRESHOLD_BIAS_CIVIL, list(civil_df_raw[_term]))) civil_df_raw['toxicity'] = list( map(lambda x: bool(round(x)), list(civil_df_raw['toxicity']))) civil_df_raw = civil_df_raw.rename(columns={ CIVIL_COMMENT_NAME: model_input_comment_field, 'toxicity': 'label' }) res = civil_df_raw.copy(deep=True) return res return input_fn_bias_civil #### #### #### #### #### #### #### SYNTHETIC DATASET #### #### #### #### #### #### #### def create_input_fn_artificial_bias(tokenizer, model_input_comment_field): """Generates an input_fn to evaluate model bias on synthetic dataset.""" def input_fn_bias(max_n_examples): # Loading it from it the unintended_ml_bias github. entire_test_bias_df = pd.read_csv( pkg_resources.resource_stream('unintended_ml_bias', 'eval_datasets/bias_madlibs_77k.csv')) entire_test_bias_df['raw_text'] = entire_test_bias_df['Text'] entire_test_bias_df['label'] = entire_test_bias_df['Label'] entire_test_bias_df['label'] = list( map(lambda x: x == 'BAD', entire_test_bias_df['label'])) entire_test_bias_df = entire_test_bias_df[['raw_text', 'label']].copy() identity_terms_synthetic = [ line.strip() for line in pkg_resources.resource_stream( 'unintended_ml_bias', 'bias_madlibs_data/adjectives_people.txt') ] model_bias_analysis.add_subgroup_columns_from_text( entire_test_bias_df, 'raw_text', identity_terms_synthetic) # Add preprocessing entire_test_bias_df['text'] = list( map(tokenizer, entire_test_bias_df['raw_text'])) if max_n_examples: res = entire_test_bias_df.sample(n=max_n_examples, random_state=2018) else: res = entire_test_bias_df res = res.copy(deep=True) res = res.rename(columns={'raw_text': model_input_comment_field}) return res return input_fn_bias #### #### #### #### #### #### #### BIASBIOS DATASET #### #### #### #### #### #### #### BIASBIOS_PATH = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190225173815/test*.tfrecord' SCRUBBED_BIASBIOS_PATH = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190225173815_scrubbed/test*.tfrecord' comments_spec = { 'comment_text': tf.FixedLenFeature([], dtype=tf.string), 'gender': tf.FixedLenFeature([], dtype=tf.string), 'title': tf.FixedLenFeature([], dtype=tf.int64) } identity_terms = [ 'gender' ] COMMENT_NAME = 'comment_text' LABEL_NAME = 'title' def create_input_fn_biasbios(tokenizer, model_input_comment_field, scrubbed=False): """"Generates an input_fn to evaluate model bias on biasbios dataset. """ def filter_fn_biasbios(example, background_filter_keep_rate=1.0): return (random.random() < background_filter_keep_rate) def input_fn_biasbios(max_n_examples=None, random_filter_keep_rate=1.0): if scrubbed: path = SCRUBBED_BIASBIOS_PATH else: path = BIASBIOS_PATH df_raw = utils_tfrecords.decode_tf_records_to_pandas( comments_spec, path, max_n_examples=max_n_examples, filter_fn=filter_fn_biasbios, ) df_raw[COMMENT_NAME] = list( map(tokenizer, df_raw[COMMENT_NAME])) #for _term in identity_terms: # df_raw[_term] = list(df_raw[_term]) #df_raw[LABEL_NAME] = list(df_raw[LABEL_NAME]) df_raw = df_raw.rename(columns={ COMMENT_NAME: model_input_comment_field, LABEL_NAME: 'label' }) res = df_raw.copy(deep=True) return res return input_fn_biasbios #### #### #### #### #### #### #### SYNTHETIC DATASET #### #### #### #### #### #### #### def create_input_fn_artificial_bias(tokenizer, model_input_comment_field): """Generates an input_fn to evaluate model bias on synthetic dataset.""" def input_fn_bias(max_n_examples): # Loading it from it the unintended_ml_bias github. entire_test_bias_df = pd.read_csv( pkg_resources.resource_stream('unintended_ml_bias', 'eval_datasets/bias_madlibs_77k.csv')) entire_test_bias_df['raw_text'] = entire_test_bias_df['Text'] entire_test_bias_df['label'] = entire_test_bias_df['Label'] entire_test_bias_df['label'] = list( map(lambda x: x == 'BAD', entire_test_bias_df['label'])) entire_test_bias_df = entire_test_bias_df[['raw_text', 'label']].copy() identity_terms_synthetic = [ line.strip() for line in pkg_resources.resource_stream( 'unintended_ml_bias', 'bias_madlibs_data/adjectives_people.txt') ] model_bias_analysis.add_subgroup_columns_from_text( entire_test_bias_df, 'raw_text', identity_terms_synthetic) # Add preprocessing entire_test_bias_df['text'] = list( map(tokenizer, entire_test_bias_df['raw_text'])) if max_n_examples: res = entire_test_bias_df.sample(n=max_n_examples, random_state=2018) else: res = entire_test_bias_df res = res.copy(deep=True) res = res.rename(columns={'raw_text': model_input_comment_field}) return res return input_fn_bias ================================================ FILE: model_evaluation/jigsaw_evaluation_pipeline.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "-YibCLoSLRHp" }, "source": [ "Copyright 2018 Google LLC.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\");\n", "you may not use this file except in compliance with the License.\n", "You may obtain a copy of the License at\n", "\n", "https://www.apache.org/licenses/LICENSE-2.0\n", "\n", "Unless required by applicable law or agreed to in writing, software\n", "distributed under the License is distributed on an \"AS IS\" BASIS,\n", "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "See the License for the specific language governing permissions and\n", "limitations under the License." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "LMykUGMauh9b" }, "source": [ "# Evaluation code\n", "\n", "\n", "__Disclaimer__\n", "* This notebook contains experimental code, which may be changed without notice.\n", "* The ideas here are some ideas relevant to fairness - they are not the whole story!\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Notebook summary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook intends to evaluate a list of models on two dimensions:\n", "- \"Performance\": How well the model perform to classify the data (intended bias). Currently, we use the AUC.\n", "- \"Bias\": How much bias does the model contain (unintended bias). Currently, we use the pinned auc.\n", "\n", "This script takes the following steps:\n", "\n", "- Defines the models to evaluate and specify their signature (expected inputs/outputs).\n", "- Write input function to generate 2 datasets:\n", " - A \"performance dataset\" which will be used for the first set of metrics. This dataset is supposed to be similar format to the training data (contain a piece of text and a label).\n", " - A \"bias dataset\" which will be used for the second set of metrics. This data contains a piece of text, a label but also some subgroup information to evaluate the unintended bias on.\n", "- Runs predictions with the export_utils.\n", "- Evaluate metrics." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "import getpass\n", "from IPython.display import display\n", "import json\n", "import nltk\n", "import numpy as np\n", "import pandas as pd\n", "import pkg_resources\n", "import os\n", "import random\n", "import re\n", "import seaborn as sns\n", "\n", "import tensorflow as tf\n", "from tensorflow.python.lib.io import file_io" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#from google.colab import auth\n", "#auth.authenticate_user()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#!pip install -U -q git+https://github.com/conversationai/unintended-ml-bias-analysis" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from unintended_ml_bias import model_bias_analysis" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import input_fn_example\n", "from utils_export.dataset import Dataset, Model\n", "from utils_export import utils_cloudml\n", "from utils_export import utils_tfrecords" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[nltk_data] Downloading package punkt to /Users/nthain/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nltk.download('punkt')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Settings" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Global variables" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# User inputs\n", "PROJECT_NAME = 'conversationai-models'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 1: Defining your model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "An important user input is the description of the deployed models that are evaluated.\n", "\n", "1- Defining which model will be used.\n", "$MODEL_NAMES defined the different names (format: \"model_name:version\").\n", "\n", "2- Defining the model signature.\n", "Currently, the `Dataset` API does not detect the signature of a CMLE model, so this information is given by a `Model` instance.\n", "You need to describe:\n", "- input_spec: what the input_file should be (argument `feature_keys_spec`). It is a dictionary which describes the name of the fields and their types.\n", "- prediction_keys (argument `prediction_keys`). It is the name of the prediction field in the model output.\n", "- Name of the example key (argument `example_key`). A unique identifier for each sentence which will be generated by the dataset API (a.k.a. your input data does not need to have this field).\n", " - When using Cloud MLE for batch predictions, data is processed in an unpredictable order. To be able to match the returned predictions with your input instances, you must have instance keys defined." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# User inputs:\n", "MODEL_NAMES = [\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738', # ??\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748', # ??\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820', # ??\n", " 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828', # ??\n", "]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# User inputs: Model description (see above for more info).\n", "TEXT_FEATURE_NAME = 'tokens' #Input defined in serving function called in run.py (arg: `text_feature_name`).\n", "SENTENCE_KEY = 'comment_key' #Input key defined in serving functioncalled in run.py (arg: `example_key_name`).\n", "#LABEL_NAME_PREDICTION_MODEL = 'scores' # Output prediction: typically $label_name/logistic\n", "LABEL_NAME_PREDICTION_MODEL = 'probabilities' # Output prediction: typically $label_name/logistic" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "model_input_spec = {\n", " TEXT_FEATURE_NAME: utils_tfrecords.EncodingFeatureSpec.LIST_STRING} #library will use this automatically\n", "\n", "model = Model(\n", " feature_keys_spec=model_input_spec,\n", " prediction_keys=LABEL_NAME_PREDICTION_MODEL,\n", " example_key=SENTENCE_KEY,\n", " model_names=MODEL_NAMES,\n", " project_name=PROJECT_NAME)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 2: Defining the input_fn" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "def tokenizer(text, lowercase=True):\n", " \"\"\"Converts text to a list of words.\n", "\n", " Args:\n", " text: piece of text to tokenize (string).\n", " lowercase: whether to include lowercasing in preprocessing (boolean).\n", " tokenizer: Python function to tokenize the text on.\n", "\n", " Returns:\n", " A list of strings (words).\n", " \"\"\"\n", " words = nltk.word_tokenize(text.decode('utf-8'))\n", " if lowercase:\n", " words = [w.lower() for w in words]\n", " return words" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Defining input_fn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We need to define first some input_fn which will be fed to the `Dataset` API.\n", "An input_fn must follow the following requirements:\n", "- Returns a pandas DataFrame\n", "- Have an argument 'max_n_examples' to control the size of the dataframe.\n", "- Containing at least a field $TEXT_FEATURE_NAME, which maps to a tokenized text (list of words) AND a field 'label' which is 1 for toxic (0 otherwise).\n", "\n", "We will define two different input_fn (1 for performance, 1 for bias). The bias input_fn should also contain identity information.\n", "\n", "Note: You can use ANY input_fn that matches those requirements. You can find a few examples of input_fn in the file input_fn_example.py (for toxicity and civil_comments dataset)." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# User inputs: Choose which one you want to use OR create your own!\n", "INPUT_FN_PERFORMANCE = input_fn_example.create_input_fn_biasbios(\n", " tokenizer,\n", " model_input_comment_field=TEXT_FEATURE_NAME,\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 3: Running prediction" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Performance dataset" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# User inputs\n", "SIZE_PERFORMANCE_DATA_SET = 10000" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gs://conversationai-models/nthain/tfrecords/performance_dataset_dir\n" ] } ], "source": [ "# Pattern for path of tf_records\n", "PERFORMANCE_DATASET_DIR = os.path.join(\n", " 'gs://conversationai-models/',\n", " getpass.getuser(),\n", " 'tfrecords',\n", " 'performance_dataset_dir')\n", "print(PERFORMANCE_DATASET_DIR)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:input_fn is compatible with the `Dataset` class.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/tensorflow/python/client/session.py:1711: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).\n", " warnings.warn('An interactive session is already active. This can '\n" ] } ], "source": [ "dataset_performance = Dataset(INPUT_FN_PERFORMANCE, PERFORMANCE_DATASET_DIR)\n", "random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future\n", "dataset_performance.load_data(SIZE_PERFORMANCE_DATA_SET, random_filter_keep_rate=0.5)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tokensgenderlabel
0[in, her, role, ,, she, is, a, member, of, an,...F17
1[his, blog, www.donaldhtaylorjr.blogspot.com, ...M25
2[he, has, primarily, reported, for, the, atlan...M12
3[andrea, 's, area, of, expertise, is, in, whol...F25
4[dr., milane, was, trained, as, a, national, c...F25
5[he, is, also, visiting, associate, professor,...M25
6[her, research, focuses, on, the, trafficking,...F25
7[he, has, been, licensed, to, practice, law, i...M3
8[after, a, two-year, postdoctoral, fellowship,...M25
9[prior, to, teaching, ,, she, was, an, account...F31
10[jackie, 's, works, are, published, in, academ...F25
11[her, research, topic, was, the, investigation...F25
12[she, graduated, with, honors, in, 2012, ., ha...F17
13[his, research, focuses, on, the, japan, air, ...M25
14[she, directed, the, 2014, peabody, award-winn...F10
15[he, lends, his, exceptional, surgical, skills...M30
16[he, teaches, courses, ranging, from, core, un...M25
17[her, major, fields, of, interest, are, develo...F25
18[dr., cole, honors, several, insurance, carrie...M23
19[she, practices, in, the, areas, of, business,...F3
20[she, has, obtained, her, phd, in, eu, law, fr...F25
21[his, photographs, are, reminiscent, of, silho...M22
22[he, earned, his, ph.d., at, the, university, ...M25
23[his, inter-, disciplinary, research, interest...M25
24[she, earned, her, ph.d., in, communication, s...F25
25[his, current, projects, examine, intergenerat...M25
26[he, has, served, as, an, expert, witness, in,...M0
27[she, 's, called, in, some, of, the, parent, o...F31
28[nneka, has, recently, become, interested, in,...F3
29[she, writes, regularly, for, faith, and, lead...F20
............
9970[he, was, previously, an, assistant, professor...M25
9971[aside, from, filmmaking, ,, he, ’, s, an, avi...M10
9972[he, lives, in, dallas, with, his, wife, and, ...M29
9973[he, exhibited, in, institutions, like, kultur...M22
9974[he, has, represented, numerous, municipalitie...M3
9975[his, works, include, portrait, ,, glamour, an...M22
9976[he, began, using, haskell, during, his, senio...M29
9977[he, has, been, involved, with, streaming, med...M2
9978[he, has, also, produced, lecture, courses, fo...M25
9979[after, completing, her, degrees, at, the, uni...F23
9980[this, is, a, slightly, edited, version, of, h...F12
9981[she, received, her, b.sc, ., in, nutrition, f...F8
9982[she, is, the, author, of, pelo, bueno, y, otr...F24
9983[she, obtained, her, bachelor, of, science, de...F23
9984[dr., kanchan, singh, practices, at, singh, de...M30
9985[prior, to, joining, fresh, 'n, fit, cuisine, ...F8
9986[he, worked, on, staff, at, aopa, pilot, magaz...M12
9987[he, started, working, on, these, themes, duri...M18
9988[his, research, aims, to, understand, the, con...M25
9989[he, received, the, ph.d., degree, in, measuri...M25
9990[he, currently, practices, at, johns, hopkins,...M30
9991[she, received, her, m.a, ., in, secondary, ed...F31
9992[his, research, interests, lie, in, the, study...M25
9993[she, graduated, with, honors, in, 2000, ., ha...F26
9994[chris, primarily, teaches, anatomy, and, phys...M31
9995[always, responsive, to, the, specific, geogra...F2
9996[he, has, worked, on, numerous, projects, that...M29
9997[he, graduated, from, the, academy, of, visual...M22
9998[most, of, his, writing, is, from, the, middle...M12
9999[he, is, currently, on, the, good, news, poetr...M24
\n", "

10000 rows × 3 columns

\n", "
" ], "text/plain": [ " tokens gender label\n", "0 [in, her, role, ,, she, is, a, member, of, an,... F 17\n", "1 [his, blog, www.donaldhtaylorjr.blogspot.com, ... M 25\n", "2 [he, has, primarily, reported, for, the, atlan... M 12\n", "3 [andrea, 's, area, of, expertise, is, in, whol... F 25\n", "4 [dr., milane, was, trained, as, a, national, c... F 25\n", "5 [he, is, also, visiting, associate, professor,... M 25\n", "6 [her, research, focuses, on, the, trafficking,... F 25\n", "7 [he, has, been, licensed, to, practice, law, i... M 3\n", "8 [after, a, two-year, postdoctoral, fellowship,... M 25\n", "9 [prior, to, teaching, ,, she, was, an, account... F 31\n", "10 [jackie, 's, works, are, published, in, academ... F 25\n", "11 [her, research, topic, was, the, investigation... F 25\n", "12 [she, graduated, with, honors, in, 2012, ., ha... F 17\n", "13 [his, research, focuses, on, the, japan, air, ... M 25\n", "14 [she, directed, the, 2014, peabody, award-winn... F 10\n", "15 [he, lends, his, exceptional, surgical, skills... M 30\n", "16 [he, teaches, courses, ranging, from, core, un... M 25\n", "17 [her, major, fields, of, interest, are, develo... F 25\n", "18 [dr., cole, honors, several, insurance, carrie... M 23\n", "19 [she, practices, in, the, areas, of, business,... F 3\n", "20 [she, has, obtained, her, phd, in, eu, law, fr... F 25\n", "21 [his, photographs, are, reminiscent, of, silho... M 22\n", "22 [he, earned, his, ph.d., at, the, university, ... M 25\n", "23 [his, inter-, disciplinary, research, interest... M 25\n", "24 [she, earned, her, ph.d., in, communication, s... F 25\n", "25 [his, current, projects, examine, intergenerat... M 25\n", "26 [he, has, served, as, an, expert, witness, in,... M 0\n", "27 [she, 's, called, in, some, of, the, parent, o... F 31\n", "28 [nneka, has, recently, become, interested, in,... F 3\n", "29 [she, writes, regularly, for, faith, and, lead... F 20\n", "... ... ... ...\n", "9970 [he, was, previously, an, assistant, professor... M 25\n", "9971 [aside, from, filmmaking, ,, he, ’, s, an, avi... M 10\n", "9972 [he, lives, in, dallas, with, his, wife, and, ... M 29\n", "9973 [he, exhibited, in, institutions, like, kultur... M 22\n", "9974 [he, has, represented, numerous, municipalitie... M 3\n", "9975 [his, works, include, portrait, ,, glamour, an... M 22\n", "9976 [he, began, using, haskell, during, his, senio... M 29\n", "9977 [he, has, been, involved, with, streaming, med... M 2\n", "9978 [he, has, also, produced, lecture, courses, fo... M 25\n", "9979 [after, completing, her, degrees, at, the, uni... F 23\n", "9980 [this, is, a, slightly, edited, version, of, h... F 12\n", "9981 [she, received, her, b.sc, ., in, nutrition, f... F 8\n", "9982 [she, is, the, author, of, pelo, bueno, y, otr... F 24\n", "9983 [she, obtained, her, bachelor, of, science, de... F 23\n", "9984 [dr., kanchan, singh, practices, at, singh, de... M 30\n", "9985 [prior, to, joining, fresh, 'n, fit, cuisine, ... F 8\n", "9986 [he, worked, on, staff, at, aopa, pilot, magaz... M 12\n", "9987 [he, started, working, on, these, themes, duri... M 18\n", "9988 [his, research, aims, to, understand, the, con... M 25\n", "9989 [he, received, the, ph.d., degree, in, measuri... M 25\n", "9990 [he, currently, practices, at, johns, hopkins,... M 30\n", "9991 [she, received, her, m.a, ., in, secondary, ed... F 31\n", "9992 [his, research, interests, lie, in, the, study... M 25\n", "9993 [she, graduated, with, honors, in, 2000, ., ha... F 26\n", "9994 [chris, primarily, teaches, anatomy, and, phys... M 31\n", "9995 [always, responsive, to, the, specific, geogra... F 2\n", "9996 [he, has, worked, on, numerous, projects, that... M 29\n", "9997 [he, graduated, from, the, academy, of, visual... M 22\n", "9998 [most, of, his, writing, is, from, the, middle... M 12\n", "9999 [he, is, currently, on, the, good, news, poetr... M 24\n", "\n", "[10000 rows x 3 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_performance.show_data()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(10000, 3)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_performance.show_data().shape" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index([u'tokens', u'gender', u'label'], dtype='object')" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_performance.show_data().columns" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "CLASS_NAMES = range(33)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "features {\n", " feature {\n", " key: \"comment_text\"\n", " value {\n", " bytes_list {\n", " value: \" In her role, she is a member of an innovative team-based care model which has been recognized by Wall Street Journal and the Robert Wood Johnson Foundation. A process improvement leader with a passion for serving vulnerable populations, Amberly was recognized by her colleagues with the first Daisy Award for Extraordinary Nurses at Cambridge Health Alliance. Amberly holds a BS in Nursing from Valparaiso University and a Masters in Public Health from the University of Massachusetts Amherst. read more\"\n", " }\n", " }\n", " }\n", " feature {\n", " key: \"gender\"\n", " value {\n", " bytes_list {\n", " value: \"F\"\n", " }\n", " }\n", " }\n", " feature {\n", " key: \"title\"\n", " value {\n", " int64_list {\n", " value: 17\n", " }\n", " }\n", " }\n", "}\n", "\n" ] } ], "source": [ "INPUT_DATA = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190220165938/eval-00000-of-00003.tfrecord'\n", "record_iterator = tf.python_io.tf_record_iterator(path=INPUT_DATA)\n", "string_record = next(record_iterator)\n", "example = tf.train.Example()\n", "example.ParseFromString(string_record)\n", "text = example.features.feature\n", "print(example)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Model is compatible with the `Dataset` instance.\n", "WARNING:tensorflow:Using past predictions. the data must match exactly (same number of lines and same order).\n" ] } ], "source": [ "# Set recompute_predictions=False to save time if predictions are available.\n", "dataset_performance.add_model_prediction_to_data(model, recompute_predictions=False, class_names=CLASS_NAMES)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gs://conversationai-models/nthain/tfrecords/performance_dataset_dir/prediction_data_tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738\n", "33\n" ] } ], "source": [ "def _load_predictions(pred_file):\n", " with file_io.FileIO(pred_file, 'r') as f:\n", " # prediction file needs to fit in memory.\n", " try:\n", " predictions = [json.loads(line) for line in f]\n", " except:\n", " predictions = []\n", " return predictions\n", "\n", "model_name_tmp = MODEL_NAMES[0]\n", "prediction_file = dataset_performance.get_path_prediction(model_name_tmp)\n", "print(prediction_file)\n", "prediction_file = os.path.join(prediction_file,\n", " 'prediction.results-00000-of-00001')\n", "print(len(_load_predictions(prediction_file)[0]['probabilities']))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Post processing" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "test_performance_df = dataset_performance.show_data()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "test_bias_df = test_performance_df.copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Analyzing final results" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 17, "status": "ok", "timestamp": 1530641283264, "user": { "displayName": "Flavien Prost", "photoUrl": "//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg", "userId": "100080410554240838905" }, "user_tz": 240 }, "id": "Y7R4heIB5GaV", "outputId": "e8e0c3bc-96d8-4635-865a-275052054df8" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tokensgenderlabeltf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6...tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32
0[in, her, role, ,, she, is, a, member, of, an,...F170.0016871.814099e-110.0026810.0098530.0042270.0557160.003005...0.0033510.0135610.0020400.0016824.412969e-046.086852e-170.0016060.0013790.0146350.000032
1[his, blog, www.donaldhtaylorjr.blogspot.com, ...M250.0147742.716771e-130.0054960.0223470.0038450.0844800.000096...0.0103090.0010550.0010620.0062059.439933e-075.250679e-180.0012040.0001500.0152520.000779
2[he, has, primarily, reported, for, the, atlan...M120.0167798.870694e-160.0016880.0713430.0005600.0298230.000032...0.0187670.0222920.0775980.0339798.196229e-053.315851e-110.0073130.0025650.1181670.001603
3[andrea, 's, area, of, expertise, is, in, whol...F250.0177421.019689e-150.0171500.0520850.0020970.0523220.002627...0.0015800.1454620.0006370.0003373.909138e-041.304484e-210.0115150.0009220.0298670.000001
4[dr., milane, was, trained, as, a, national, c...F250.0155311.783027e-120.1962270.0164710.0026900.0000400.001384...0.0134450.0037540.2200900.0812327.920414e-052.406181e-130.1508170.0149130.0716320.000142
\n", "

5 rows × 135 columns

\n", "
" ], "text/plain": [ " tokens gender label \\\n", "0 [in, her, role, ,, she, is, a, member, of, an,... F 17 \n", "1 [his, blog, www.donaldhtaylorjr.blogspot.com, ... M 25 \n", "2 [he, has, primarily, reported, for, the, atlan... M 12 \n", "3 [andrea, 's, area, of, expertise, is, in, whol... F 25 \n", "4 [dr., milane, was, trained, as, a, national, c... F 25 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0 \\\n", "0 0.001687 \n", "1 0.014774 \n", "2 0.016779 \n", "3 0.017742 \n", "4 0.015531 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1 \\\n", "0 1.814099e-11 \n", "1 2.716771e-13 \n", "2 8.870694e-16 \n", "3 1.019689e-15 \n", "4 1.783027e-12 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2 \\\n", "0 0.002681 \n", "1 0.005496 \n", "2 0.001688 \n", "3 0.017150 \n", "4 0.196227 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3 \\\n", "0 0.009853 \n", "1 0.022347 \n", "2 0.071343 \n", "3 0.052085 \n", "4 0.016471 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4 \\\n", "0 0.004227 \n", "1 0.003845 \n", "2 0.000560 \n", "3 0.002097 \n", "4 0.002690 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5 \\\n", "0 0.055716 \n", "1 0.084480 \n", "2 0.029823 \n", "3 0.052322 \n", "4 0.000040 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6 \\\n", "0 0.003005 \n", "1 0.000096 \n", "2 0.000032 \n", "3 0.002627 \n", "4 0.001384 \n", "\n", " ... \\\n", "0 ... \n", "1 ... \n", "2 ... \n", "3 ... \n", "4 ... \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23 \\\n", "0 0.003351 \n", "1 0.010309 \n", "2 0.018767 \n", "3 0.001580 \n", "4 0.013445 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24 \\\n", "0 0.013561 \n", "1 0.001055 \n", "2 0.022292 \n", "3 0.145462 \n", "4 0.003754 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25 \\\n", "0 0.002040 \n", "1 0.001062 \n", "2 0.077598 \n", "3 0.000637 \n", "4 0.220090 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26 \\\n", "0 0.001682 \n", "1 0.006205 \n", "2 0.033979 \n", "3 0.000337 \n", "4 0.081232 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27 \\\n", "0 4.412969e-04 \n", "1 9.439933e-07 \n", "2 8.196229e-05 \n", "3 3.909138e-04 \n", "4 7.920414e-05 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28 \\\n", "0 6.086852e-17 \n", "1 5.250679e-18 \n", "2 3.315851e-11 \n", "3 1.304484e-21 \n", "4 2.406181e-13 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29 \\\n", "0 0.001606 \n", "1 0.001204 \n", "2 0.007313 \n", "3 0.011515 \n", "4 0.150817 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30 \\\n", "0 0.001379 \n", "1 0.000150 \n", "2 0.002565 \n", "3 0.000922 \n", "4 0.014913 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31 \\\n", "0 0.014635 \n", "1 0.015252 \n", "2 0.118167 \n", "3 0.029867 \n", "4 0.071632 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32 \n", "0 0.000032 \n", "1 0.000779 \n", "2 0.001603 \n", "3 0.000001 \n", "4 0.000142 \n", "\n", "[5 rows x 135 columns]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_performance_df.head()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 233 }, "colab_type": "code", "executionInfo": { "elapsed": 41, "status": "ok", "timestamp": 1530641286091, "user": { "displayName": "Flavien Prost", "photoUrl": "//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg", "userId": "100080410554240838905" }, "user_tz": 240 }, "id": "Ln2BXOg4Q6GP", "outputId": "bb5288e8-9f10-4796-b36e-42f5c02cb148" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tokensgenderlabeltf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6...tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32
0[in, her, role, ,, she, is, a, member, of, an,...F170.0016871.814099e-110.0026810.0098530.0042270.0557160.003005...0.0033510.0135610.0020400.0016824.412969e-046.086852e-170.0016060.0013790.0146350.000032
1[his, blog, www.donaldhtaylorjr.blogspot.com, ...M250.0147742.716771e-130.0054960.0223470.0038450.0844800.000096...0.0103090.0010550.0010620.0062059.439933e-075.250679e-180.0012040.0001500.0152520.000779
2[he, has, primarily, reported, for, the, atlan...M120.0167798.870694e-160.0016880.0713430.0005600.0298230.000032...0.0187670.0222920.0775980.0339798.196229e-053.315851e-110.0073130.0025650.1181670.001603
3[andrea, 's, area, of, expertise, is, in, whol...F250.0177421.019689e-150.0171500.0520850.0020970.0523220.002627...0.0015800.1454620.0006370.0003373.909138e-041.304484e-210.0115150.0009220.0298670.000001
4[dr., milane, was, trained, as, a, national, c...F250.0155311.783027e-120.1962270.0164710.0026900.0000400.001384...0.0134450.0037540.2200900.0812327.920414e-052.406181e-130.1508170.0149130.0716320.000142
\n", "

5 rows × 135 columns

\n", "
" ], "text/plain": [ " tokens gender label \\\n", "0 [in, her, role, ,, she, is, a, member, of, an,... F 17 \n", "1 [his, blog, www.donaldhtaylorjr.blogspot.com, ... M 25 \n", "2 [he, has, primarily, reported, for, the, atlan... M 12 \n", "3 [andrea, 's, area, of, expertise, is, in, whol... F 25 \n", "4 [dr., milane, was, trained, as, a, national, c... F 25 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0 \\\n", "0 0.001687 \n", "1 0.014774 \n", "2 0.016779 \n", "3 0.017742 \n", "4 0.015531 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1 \\\n", "0 1.814099e-11 \n", "1 2.716771e-13 \n", "2 8.870694e-16 \n", "3 1.019689e-15 \n", "4 1.783027e-12 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2 \\\n", "0 0.002681 \n", "1 0.005496 \n", "2 0.001688 \n", "3 0.017150 \n", "4 0.196227 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3 \\\n", "0 0.009853 \n", "1 0.022347 \n", "2 0.071343 \n", "3 0.052085 \n", "4 0.016471 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4 \\\n", "0 0.004227 \n", "1 0.003845 \n", "2 0.000560 \n", "3 0.002097 \n", "4 0.002690 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5 \\\n", "0 0.055716 \n", "1 0.084480 \n", "2 0.029823 \n", "3 0.052322 \n", "4 0.000040 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6 \\\n", "0 0.003005 \n", "1 0.000096 \n", "2 0.000032 \n", "3 0.002627 \n", "4 0.001384 \n", "\n", " ... \\\n", "0 ... \n", "1 ... \n", "2 ... \n", "3 ... \n", "4 ... \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23 \\\n", "0 0.003351 \n", "1 0.010309 \n", "2 0.018767 \n", "3 0.001580 \n", "4 0.013445 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24 \\\n", "0 0.013561 \n", "1 0.001055 \n", "2 0.022292 \n", "3 0.145462 \n", "4 0.003754 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25 \\\n", "0 0.002040 \n", "1 0.001062 \n", "2 0.077598 \n", "3 0.000637 \n", "4 0.220090 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26 \\\n", "0 0.001682 \n", "1 0.006205 \n", "2 0.033979 \n", "3 0.000337 \n", "4 0.081232 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27 \\\n", "0 4.412969e-04 \n", "1 9.439933e-07 \n", "2 8.196229e-05 \n", "3 3.909138e-04 \n", "4 7.920414e-05 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28 \\\n", "0 6.086852e-17 \n", "1 5.250679e-18 \n", "2 3.315851e-11 \n", "3 1.304484e-21 \n", "4 2.406181e-13 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29 \\\n", "0 0.001606 \n", "1 0.001204 \n", "2 0.007313 \n", "3 0.011515 \n", "4 0.150817 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30 \\\n", "0 0.001379 \n", "1 0.000150 \n", "2 0.002565 \n", "3 0.000922 \n", "4 0.014913 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31 \\\n", "0 0.014635 \n", "1 0.015252 \n", "2 0.118167 \n", "3 0.029867 \n", "4 0.071632 \n", "\n", " tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32 \n", "0 0.000032 \n", "1 0.000779 \n", "2 0.001603 \n", "3 0.000001 \n", "4 0.000142 \n", "\n", "[5 rows x 135 columns]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_bias_df.head()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "8m8QI4qEjtcY" }, "source": [ "# Part 4: Run evaluation metrics" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "PhwSHsMtO9fF" }, "source": [ "## Performance metrics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data Format" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "At this point, our performance data is in DataFrame df, with columns:\n", "\n", "- label: True if the comment is Toxic, False otherwise.\n", "- < model name >: One column per model, cells contain the score from that model.\n", "You can run the analysis below on any data in this format. Subgroup labels can be generated via words in the text as done above, or come from human labels if you have them." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Run AUC" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "XUZYCq-6N8MK" }, "outputs": [], "source": [ "import sklearn.metrics as metrics" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "25 3295\n", "3 890\n", "22 661\n", "12 542\n", "26 507\n", "23 494\n", "17 481\n", "31 427\n", "30 343\n", "7 268\n", "2 265\n", "18 209\n", "16 202\n", "24 197\n", "29 194\n", "10 185\n", "6 156\n", "0 141\n", "8 102\n", "5 87\n", "20 67\n", "4 58\n", "32 50\n", "19 41\n", "9 39\n", "11 37\n", "27 32\n", "21 30\n", "Name: label, dtype: int64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_performance_df.label.value_counts()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", "5 False\n", "6 False\n", "7 True\n", "8 False\n", "9 False\n", "10 False\n", "11 False\n", "12 False\n", "13 False\n", "14 False\n", "15 False\n", "16 False\n", "17 False\n", "18 False\n", "19 True\n", "20 False\n", "21 False\n", "22 False\n", "23 False\n", "24 False\n", "25 False\n", "26 False\n", "27 False\n", "28 True\n", "29 False\n", " ... \n", "9970 False\n", "9971 False\n", "9972 False\n", "9973 False\n", "9974 True\n", "9975 False\n", "9976 False\n", "9977 False\n", "9978 False\n", "9979 False\n", "9980 False\n", "9981 False\n", "9982 False\n", "9983 False\n", "9984 False\n", "9985 False\n", "9986 False\n", "9987 False\n", "9988 False\n", "9989 False\n", "9990 False\n", "9991 False\n", "9992 False\n", "9993 False\n", "9994 False\n", "9995 False\n", "9996 False\n", "9997 False\n", "9998 False\n", "9999 False\n", "Name: label, Length: 10000, dtype: bool" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_performance_df['label'] == 3" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0.009853\n", "1 0.022347\n", "2 0.071343\n", "3 0.052085\n", "4 0.016471\n", "5 0.101164\n", "6 0.011855\n", "7 0.001939\n", "8 0.577954\n", "9 0.128116\n", "10 0.014246\n", "11 0.022629\n", "12 0.050127\n", "13 0.205395\n", "14 0.038603\n", "15 0.045960\n", "16 0.652514\n", "17 0.099024\n", "18 0.055800\n", "19 0.167238\n", "20 0.056128\n", "21 0.073346\n", "22 0.040896\n", "23 0.046719\n", "24 0.066602\n", "25 0.015700\n", "26 0.018788\n", "27 0.099245\n", "28 0.744404\n", "29 0.054567\n", " ... \n", "9970 0.025056\n", "9971 0.032513\n", "9972 0.059166\n", "9973 0.030145\n", "9974 0.146219\n", "9975 0.132243\n", "9976 0.061952\n", "9977 0.497093\n", "9978 0.154263\n", "9979 0.033800\n", "9980 0.041427\n", "9981 0.000079\n", "9982 0.071002\n", "9983 0.961150\n", "9984 0.017224\n", "9985 0.113003\n", "9986 0.040686\n", "9987 0.729384\n", "9988 0.025192\n", "9989 0.066657\n", "9990 0.025502\n", "9991 0.011763\n", "9992 0.007214\n", "9993 0.004737\n", "9994 0.044174\n", "9995 0.125944\n", "9996 0.199613\n", "9997 0.018891\n", "9998 0.218019\n", "9999 0.052486\n", "Name: tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3, Length: 10000, dtype: float64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "_model = 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738'\n", "_class = 3\n", "test_performance_df['{}_{}'.format(_model, _class)]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "executionInfo": { "elapsed": 32, "status": "ok", "timestamp": 1530641399913, "user": { "displayName": "Flavien Prost", "photoUrl": "//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg", "userId": "100080410554240838905" }, "user_tz": 240 }, "id": "yc8SWZbqMwA4", "outputId": "6e9399b8-ce22-42bb-c318-959bae73f6c0", "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.472880379306\n", "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n", "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.494346987625\n", "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.5094779166\n", "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.579115768006\n", "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.495869234756\n", "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.468048349118\n", "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.485770898896\n", "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.491489665173\n", "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.47350564638\n", "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.488175572414\n", "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.52613046651\n", "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.496119960142\n", "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n", "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n", "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n", "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.520060671101\n", "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.502598042781\n", "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.471809136308\n", "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.589720292223\n", "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.464268809982\n", "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.451838849883\n", "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.501252940388\n", "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.522887952293\n", "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.50126994171\n", "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.502592883032\n", "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.4976489476\n", "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.413984124197\n", "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\n", "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.485232058639\n", "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.475149523707\n", "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.512695371032\n", "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.443107537688\n", "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.473124962683\n", "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n", "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.502436065161\n", "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.497505395972\n", "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.533997183665\n", "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.516225645878\n", "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.479381557424\n", "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.503250547509\n", "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.501472866374\n", "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.511796004417\n", "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.472370750781\n", "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.503774777488\n", "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.482292660736\n", "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n", "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n", "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n", "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.509781244505\n", "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.511501561927\n", "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.466850476392\n", "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.616544907291\n", "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.517680398972\n", "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.48543965229\n", "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498092928991\n", "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.527383088967\n", "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.516476102053\n", "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498915515\n", "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498317975812\n", "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.439794843499\n", "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\n", "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.509969175195\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/sklearn/metrics/ranking.py:571: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless\n", " UndefinedMetricWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.493638808206\n", "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.508299713945\n", "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.457780904523\n", "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.496740926496\n", "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n", "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499153608357\n", "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499355443456\n", "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.519405656255\n", "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.510566062676\n", "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.480932677982\n", "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.492101760004\n", "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.521062880598\n", "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.46758254629\n", "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.475540747064\n", "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.540092938467\n", "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.486065994621\n", "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n", "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n", "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n", "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.488949553253\n", "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.512517147563\n", "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.480352770023\n", "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.524139214683\n", "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.531170784555\n", "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.486539618857\n", "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.493480481944\n", "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.493649014345\n", "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.519584546531\n", "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.502616827295\n", "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499241317853\n", "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.527983296549\n", "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\n", "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.513514238074\n", "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.505267708646\n", "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.526942603747\n", "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.416369849246\n", "Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.492310370551\n", "Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n", "Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.510422808191\n", "Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.497258969647\n", "Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.533468253803\n", "Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.51988275004\n", "Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.446890074912\n", "Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.470106311844\n", "Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.480683362454\n", "Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.519891680117\n", "Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.498969861354\n", "Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.49575049304\n", "Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.496308597575\n", "Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n", "Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n", "Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n", "Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.497497468669\n", "Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.498361194233\n", "Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.454219503411\n", "Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.554294558911\n", "Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.510198929845\n", "Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.443848211301\n", "Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.511251516464\n", "Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.527593056506\n", "Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.517610635095\n", "Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.507171714086\n", "Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.494850664384\n", "Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.433402513042\n", "Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\n", "Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.529500137723\n", "Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.485269677036\n", "Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.513662670014\n", "Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.429722613065\n" ] } ], "source": [ "auc_list = []\n", "for _model in MODEL_NAMES:\n", " for _class in CLASS_NAMES:\n", " fpr, tpr, thresholds = metrics.roc_curve(\n", " test_performance_df['label'] == _class,\n", " test_performance_df['{}_{}'.format(_model, _class)])\n", " _auc = metrics.auc(fpr, tpr)\n", " auc_list.append(_auc)\n", " print ('Auc for class {} model {}: {}'.format(_class, _model, _auc))" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "def get_class_from_col_name(col_name):\n", " pattern = r'^.*_(\\d+)$'\n", " return int(re.search(pattern, col_name).group(1))" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "def find_best_class(df, model_name, class_names):\n", " model_class_names = ['{}_{}'.format(model_name, class_name) for class_name in class_names]\n", " sub_df = df[model_class_names]\n", " df['{}_class'.format(model_name)] = sub_df.idxmax(axis=1).apply(get_class_from_col_name)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "for _model in MODEL_NAMES:\n", " find_best_class(test_performance_df, _model, CLASS_NAMES)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.0572\n", "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.0639\n", "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.0681\n", "Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.0623\n" ] } ], "source": [ "accuracy_list = []\n", "for _model in MODEL_NAMES:\n", " is_correct = (test_performance_df['{}_class'.format(_model)] == test_performance_df['label'])\n", " _acc = sum(is_correct)/len(is_correct)\n", " accuracy_list.append(_acc)\n", " print ('Accuracy for model {}: {}'.format(_model, _acc))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "vTrKsfIcxoBh" }, "source": [ "## Unintended Bias Metrics" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "D3ZJSKY8FHFH" }, "source": [ "### Data Format\n", "At this point, our bias data is in DataFrame df, with columns:\n", "\n", "* label: True if the comment is Toxic, False otherwise.\n", "* < model name >: One column per model, cells contain the score from that model.\n", "* < subgroup >: One column per identity, True if the comment mentions this identity.\n", "\n", "You can run the analysis below on any data in this format. Subgroup labels can be \n", "generated via words in the text as done above, or come from human labels if you have them.\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "scrolled": false }, "outputs": [ { "ename": "KeyError", "evalue": "'male'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0midentity_terms_civil_included\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_term\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minput_fn_example\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midentity_terms_civil\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_bias_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0m_term\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'keeping {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_term\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0midentity_terms_civil_included\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_term\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2137\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2138\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2139\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2141\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_getitem_column\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2144\u001b[0m \u001b[0;31m# get column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2145\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2146\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2148\u001b[0m \u001b[0;31m# duplicate columns & possible reduce dimensionality\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/generic.pyc\u001b[0m in \u001b[0;36m_get_item_cache\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 1840\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1841\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1842\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1843\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1844\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/internals.pyc\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, item, fastpath)\u001b[0m\n\u001b[1;32m 3841\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3842\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3843\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3844\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3845\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/indexes/base.pyc\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2525\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2526\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2529\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'male'" ] } ], "source": [ "identity_terms_civil_included = []\n", "for _term in input_fn_example.identity_terms_civil:\n", " if sum(test_bias_df[_term]) >= 20:\n", " print ('keeping {}'.format(_term))\n", " identity_terms_civil_included.append(_term)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_bias_df['model_1'] = test_bias_df['tf_gru_attention_civil:v_20181109_164318']\n", "test_bias_df['model_2'] = test_bias_df['tf_gru_attention_civil:v_20181109_164403']\n", "test_bias_df['model_3'] = test_bias_df['tf_gru_attention_civil:v_20181109_164535']\n", "test_bias_df['model_4'] = test_bias_df['tf_gru_attention_civil:v_20181109_164630']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MODEL_NAMES = ['model_1', 'model_2', 'model_3', 'model_4']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bias_metrics = model_bias_analysis.compute_bias_metrics_for_models(test_bias_df, identity_terms_civil_included, MODEL_NAMES, 'label')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model_bias_analysis.plot_auc_heatmap(bias_metrics, MODEL_NAMES)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model_bias_analysis.plot_aeg_heatmap(bias_metrics, MODEL_NAMES)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "collapsed_sections": [], "default_view": {}, "name": "jigsaw-evaluation-pipeline.ipynb", "provenance": [], "version": "0.3.2", "views": {} }, "kernelspec": { "display_name": "models_eval", "language": "python", "name": "models_eval" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 1 } ================================================ FILE: model_evaluation/requirements.txt ================================================ google-api-python-client==1.7.3 Markdown==2.6.11 nltk==3.9 numpy==1.22.0 pandas==0.22.0 requests==2.32.2 seaborn==0.8.1 scikit-learn==0.19.1 scipy==1.10.0 sklearn==0.0 six==1.11.0 tensorflow==2.12.1 jupyter==1.0.0 matplotlib==2.0.2 nltk==3.9 ================================================ FILE: model_evaluation/score_bias_data.sh ================================================ #!/bin/bash MODEL_NAMES='tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113247,'\ 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113241,'\ 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113114,'\ 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113106,'\ 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_163707,'\ 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_163723' CLASS_NAMES='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32' TEST_DATA='biasbios' OUTPUT_PATH='gs://conversationai-models/biosbias/scored_data/standard_test.csv' echo """ Running... python score_test_data.py \\ --model_names=$MODEL_NAMES \\ --class_names=$CLASS_NAMES \\ --test_data=$TEST_DATA \\ --output_path=$OUTPUT_PATH """ python score_test_data.py \ --model_names=$MODEL_NAMES \ --class_names=$CLASS_NAMES \ --test_data=$TEST_DATA \ --output_path=$OUTPUT_PATH ================================================ FILE: model_evaluation/score_scrubbed_data.sh ================================================ #!/bin/bash MODEL_NAMES='tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113045,'\ 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954' CLASS_NAMES='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32' TEST_DATA='scrubbed_biasbios' OUTPUT_PATH='gs://conversationai-models/biosbias/scored_data/scrubbed_test.csv' echo """ Running... python score_test_data.py \\ --model_names=$MODEL_NAMES \\ --class_names=$CLASS_NAMES \\ --test_data=$TEST_DATA \\ --output_path=$OUTPUT_PATH """ python score_test_data.py \ --model_names=$MODEL_NAMES \ --class_names=$CLASS_NAMES \ --test_data=$TEST_DATA \ --output_path=$OUTPUT_PATH ================================================ FILE: model_evaluation/score_test_data.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Convenience script to score some data with CMLE models.""" import getpass import nltk import os import pandas as pd import random import tensorflow as tf import input_fn_example from utils_export.dataset import Dataset, Model from utils_export import utils_cloudml from utils_export import utils_tfrecords tf.app.flags.DEFINE_string( 'model_names', None, 'Comma separated list of model names deployed on ML Engine.') tf.app.flags.DEFINE_string( 'class_names', None, 'Comma separated list of class names to evaluate.') tf.app.flags.DEFINE_string('test_data', None, 'Test data to evaluate on. Must correspond to one in input_fn_example.py.') tf.app.flags.DEFINE_string('output_path', None, 'Path to write scored test data.') tf.app.flags.DEFINE_string('project_name', 'conversationai-models', 'Name of GCS project.') tf.app.flags.DEFINE_string('text_feature_name', 'tokens', 'Name of the text feature (see serving function call in run.py).') tf.app.flags.DEFINE_string('sentence_key', 'comment_key', 'Name of input key (see serving function call in run.py).') tf.app.flags.DEFINE_string('prediction_name', 'probabilities', 'Name of output prediction.') tf.app.flags.DEFINE_integer('dataset_size', 100000, 'Maximum size of dataset to score.') FLAGS = tf.app.flags.FLAGS def get_input_fn(test_data, tokenizer, model_input_comment_field): if test_data == 'biasbios': return input_fn_example.create_input_fn_biasbios(tokenizer, model_input_comment_field) elif test_data == 'scrubbed_biasbios': return input_fn_example.create_input_fn_biasbios(tokenizer, model_input_comment_field, scrubbed=True) else: raise ValueError('Dataset not currently supported.') def tokenizer(text, lowercase=True): """Converts text to a list of words. Args: text: piece of text to tokenize (string). lowercase: whether to include lowercasing in preprocessing (boolean). tokenizer: Python function to tokenize the text on. Returns: A list of strings (words). """ words = nltk.word_tokenize(text.decode('utf-8')) if lowercase: words = [w.lower() for w in words] return words def score_data(model_names, class_names, test_data, output_path, project_name, text_feature_name, sentence_key, prediction_name, dataset_size): """Scores a test dataset with ML engine models and writes output as csv. Args: model_names: list of model names deployed on ML Engine. class_names: list of class names to evaluate. test_data: test data to evaluate on, must be defined in get_input_fn. output_path: path to write scored test data. project_name: name of Google Cloud project. text_feature_name: name of the text feature (see serving function call in run.py). sentence_key: name of input key (see serving function call in run.py). prediction_name: name of output prediction. dataset_size: maximum size of dataset to score. """ os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530 nltk.download('punkt') # Load data. input_fn = get_input_fn(test_data, tokenizer, model_input_comment_field=text_feature_name, ) performance_dataset_dir = os.path.join( 'gs://conversationai-models/', getpass.getuser(), 'tfrecords', 'performance_dataset_dir_3') dataset = Dataset(input_fn, performance_dataset_dir) random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future # Define and call model. model_input_spec = { text_feature_name: utils_tfrecords.EncodingFeatureSpec.LIST_STRING} #library will use this automatically dataset.load_data(dataset_size, random_filter_keep_rate=0.5) model = Model( feature_keys_spec=model_input_spec, prediction_keys=prediction_name, example_key=sentence_key, model_names=model_names, project_name=project_name) dataset.add_model_prediction_to_data(model, recompute_predictions=True, class_names=class_names) # Save data. scored_test_df = dataset.show_data() scored_test_df.to_csv(tf.gfile.Open(output_path, 'w'), index = False) if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) model_names = [name.strip() for name in FLAGS.model_names.split(',')] print(model_names) class_names = [name.strip() for name in FLAGS.class_names.split(',')] print(class_names) score_data(model_names, class_names, FLAGS.test_data, FLAGS.output_path, FLAGS.project_name, FLAGS.text_feature_name, FLAGS.sentence_key, FLAGS.prediction_name, FLAGS.dataset_size) ================================================ FILE: model_evaluation/utils_export/__init__.py ================================================ ================================================ FILE: model_evaluation/utils_export/dataset.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines the dataset structure for evaluation pipeline.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import inspect import os import pandas as pd import tensorflow as tf from tensorflow.python.platform import tf_logging as logging import utils_export.utils_cloudml as utils_cloudml import utils_export.utils_tfrecords as utils_tfrecords # Quota for concurrent prediction jobs CMLE_QUOTA_PREDICTION = 7 class Model(object): """Defines the spec of a CMLE Model. All models (given by `model_names`) need to share the feature_keys_spec, example_key and prediction_keys. Those fields define the inputs (feature_keys_spec, example_key) and output of the models. """ def __init__(self, feature_keys_spec, prediction_keys, model_names, project_name, example_key='example_key'): """Initializes a model and defines its signature. Args: feature_keys_spec: spec of the tf_records input to the model. prediction_keys: Name of the keys to extract from model outputs. model_names: List of names of the model in Cloud MLE. Format should be $MODEL_NAME:$VERSION. If no version given, will take default version. project_name: name of the gcp project. example_key: name of the example key expected by the model. Raises: ValueError: If example_key is included in the feature_spec of if feature_keys_spec does not match required format. Note: When used with `Dataset`, the dataframe returned by the input_fn should not contain the `example_key`, as it will be later created by the API. """ utils_tfrecords.is_valid_spec(feature_keys_spec) if example_key in feature_keys_spec: raise ValueError('example_key should not be part of input_data.' 'It will be created when writing to tf-records') self._model_name = model_names self._feature_keys_spec = feature_keys_spec self._prediction_keys = prediction_keys self._project_name = project_name self._example_key = example_key def feature_keys_spec(self): return self._feature_keys_spec def example_key(self): return self._example_key def model_names(self): return self._model_name def prediction_keys(self): return self._prediction_keys def project_name(self): return self._project_name def set_job_ids_prediction(self, job_ids): self._job_ids_prediction = job_ids def job_ids_prediction(self): return self._job_ids_prediction class Dataset(object): """Defines a format for every dataset to work with evaluation pipeline. Usage: input_fn = ... (returns pandas DataFrame). dataset = Dataset(input_fn, dataset_dir) # Verifies that input_fn is ok. dataset.load_data(10000) model = Model(...) # Next function verifies that models are compatible. dataset.add_model_prediction_to_data(model) dataset.show_data() """ def __init__(self, input_fn, dataset_dir): """Initialises a `Dataset` instance. Args: input_fn: function that returns a pandas `Dataframe`. dataset_dir: Directory where to save the temporary files, in particular tf_records inputs and outputs of CMLE. """ self.check_input_fn(input_fn) self._input_fn = input_fn self._dataset_dir = dataset_dir def show_data(self): if not hasattr(self, 'data'): raise ValueError('Dataset does not have data yet.' ' You need to run `load_data` first.') return self.data def check_input_fn(self, input_fn): """Checks if the input_fn meets requirements.""" args_input_fn = inspect.getargspec(input_fn).args if 'max_n_examples' not in args_input_fn: raise ValueError('input_fn should have (at least) `max_n_examples`' ' as arguments.') loaded_data = input_fn(max_n_examples=1) if not isinstance(loaded_data, pd.DataFrame): raise ValueError('input_fn should return a pandas DataFrame.') if len(loaded_data) != 1: raise ValueError( 'input_fn(max_n_examples=1) should contain 1 row (exactly).') logging.info('input_fn is compatible with the `Dataset` class.') def check_compatibility(self, model): """Checks that input_fn is compatible with the model.""" if hasattr(self, 'data'): test_df = self.data else: test_df = self._input_fn(max_n_examples=1) for key in model.feature_keys_spec(): if key not in test_df.columns: raise ValueError( 'input_fn must contain at least the feature keys {}'.format( model.feature_keys_spec())) logging.info('Model is compatible with the `Dataset` instance.') def load_data(self, max_n_examples, **kwargs): self.data = self._input_fn(max_n_examples=max_n_examples, **kwargs) def get_path_input_tf(self): """Returns the path to input tf-records (input of CMLE).""" name = 'input_data.tfrecords' input_path = os.path.join(self._dataset_dir, name) return input_path def get_path_prediction(self, model_name): """Returns the path to prediction files (output of CMLE).""" name = 'prediction_data_{}'.format(model_name) prediction_path = os.path.join(self._dataset_dir, name) return prediction_path def convert_data_to_tf(self, feature_keys_spec, example_key, overwrite=True): """Writes self.data to tf-records. Args: feature_keys_spec: the spec of the feature_keys. Only those fields will be written to tf-records. example_key: Name of the field for example_key. The key will be generated on the fly. overwrite: Whether to overwrite the existing tf_records. Raises: ValueError: if dataset does not have data loaded. """ if not hasattr(self, 'data'): raise ValueError('Dataset does not have data yet.' ' You need to run `load_data` first.') path_input_tf = self.get_path_input_tf() if tf.gfile.Exists(path_input_tf): if overwrite: logging.info('TF-Records already exist - overwriting them.') else: logging.info('TF-Records already exist - We will use those.') return utils_tfrecords.encode_pandas_to_tfrecords(self.data, feature_keys_spec, path_input_tf, example_key) def call_prediction(self, model): """Starts a CMLE batch prediction job for the model.""" path_input_tf = self.get_path_input_tf() if not tf.gfile.Exists(path_input_tf): raise ValueError('Dataset does not have input_tf_records yet.' ' You need to run `convert_data_to_tf` first.') if len(model.model_names()) > CMLE_QUOTA_PREDICTION: raise ValueError('Model should not contain more than {} versions.' ' If you need more, split the version into two' ' different models.'.format(CMLE_QUOTA_PREDICTION)) job_ids = [] for model_name_full in model.model_names(): model_name_split = model_name_full.split(':') model_name = model_name_split[0] if len(model_name_split) > 1: version = model_name_split[1] else: version = None output_pred_path = self.get_path_prediction(model_name_full) job_id = utils_cloudml.call_model_predictions_from_df( project_name=model.project_name(), input_tf_records=path_input_tf, output_prediction_path=output_pred_path, model_name=model_name, version_name=version) job_ids.append(job_id) model.set_job_ids_prediction(job_ids) def collect_prediction(self, model, class_names): """Collects the predictions of CMLE jobs and adds it to dataframe.""" for model_name in model.model_names(): tf_record_prediction = self.get_path_prediction(model_name) self.data = utils_cloudml.add_model_predictions_to_df( self.data, prediction_file=tf_record_prediction, model_col_name=model_name, prediction_name=model.prediction_keys(), example_key=model.example_key(), class_names=class_names) def wait_predictions(self, model): """Loops until the prediction jobs of the model completed.""" if not hasattr(model, 'job_ids_prediction'): raise ValueError( 'Model does not have any `job_ids_prediction`.' ' You need to run `call_prediction` for CMLE batch prediction job.') for job_id in model.job_ids_prediction(): utils_cloudml.check_job_over(model.project_name(), job_id) def add_model_prediction_to_data(self, model, recompute_predictions=True, class_names=None): """Computes the prediction of the model and adds it to dataframe. Args: model: a `Model` instance. recompute_predictions: Indicates if we run predictions (batch prediction job) or if we load past prediction files. If use past predictions (when False), the data must match exactly (same number of lines and in same order). class_names (optional): If the model is a multiclass model, you can specify class names. The model will then return a logit value per class instead of a single value. """ def _compute_predictions_less_than_quota(self, model, need_to_convert_data=True): """Runs predictions for a model that has less than $QUOTA versions.""" if need_to_convert_data: self.convert_data_to_tf(model.feature_keys_spec(), model.example_key()) self.call_prediction(model) self.wait_predictions(model) self.check_compatibility(model) if recompute_predictions: num_epochs = int(len(model.model_names()) / CMLE_QUOTA_PREDICTION) for i in range(0, num_epochs + 1): logging.info('Doing batch {}/{}'.format(i, num_epochs)) min_index = i*CMLE_QUOTA_PREDICTION max_index = min((i + 1) * CMLE_QUOTA_PREDICTION, len(model.model_names())) sub_model_names = model.model_names()[min_index:max_index] sub_model = Model( model.feature_keys_spec(), model.prediction_keys(), sub_model_names, model.project_name(), model.example_key()) need_to_convert_data = (i == 0) _compute_predictions_less_than_quota(self, sub_model, need_to_convert_data) else: logging.warning( 'Using past predictions. ' 'the data must match exactly (same number of lines and same order).') self.collect_prediction(model, class_names) ================================================ FILE: model_evaluation/utils_export/dataset_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import getpass import os import time import unittest from dataset import Dataset from dataset import Model import pandas as pd from utils_tfrecords import EncodingFeatureSpec class TestCompatibleInputFn(unittest.TestCase): """Verifies the compatibility of input_fn with `Dataset`.""" def testCorrect(self): def input_fn(max_n_examples): return pd.DataFrame({ 'comment_text': ['This is one'] * max_n_examples, 'label_name': [0] * max_n_examples }) try: Dataset(input_fn, 'dataset_dir') except ValueError: self.fail('Dataset raised an exception unexpectedly!') def testWrongArgInputFn(self): def input_fn(other_args=1.0): assert other_args return {'other_feature': ['This is one'], 'label_name': [0]} with self.assertRaises(Exception) as context: Dataset(input_fn, 'dataset_dir') self.assertIn('input_fn should have (at least) `max_n_examples`', str(context.exception)) def testInputFnWrongType(self): def input_fn(max_n_examples): return { 'other_feature': ['This is one'] * max_n_examples, 'label_name': [0] * max_n_examples } with self.assertRaises(Exception) as context: Dataset(input_fn, 'dataset_dir') self.assertIn('input_fn should return a pandas DataFrame.', str(context.exception)) def testWrongNumberOfLines(self): def input_fn(max_n_examples=1): assert max_n_examples return pd.DataFrame({ 'comment_text': ['This is one'] * 2, 'label_name': [0] * 2 }) with self.assertRaises(Exception) as context: Dataset(input_fn, 'dataset_dir') self.assertIn( 'input_fn(max_n_examples=1) should contain 1 row (exactly).', str(context.exception)) class TestModelCompatibleWithInputFn(unittest.TestCase): """Verifies the compatibility between input_fn and model.""" def testBadTypeFeatureKeys(self): with self.assertRaises(Exception) as context: model = Model( feature_keys_spec='comment_text', prediction_keys='prediction_key', model_names='None', project_name=None) self.assertIn('Spec should be a dictionary', str(context.exception)) def testInputFnMissingFeatureKeys(self): model = Model( feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING}, prediction_keys='prediction_key', model_names='None', project_name=None) def input_fn(max_n_examples): return pd.DataFrame({ 'other_feature': ['This is one'] * max_n_examples, 'label_name': [0] * max_n_examples }) with self.assertRaises(Exception) as context: dataset = Dataset(input_fn, 'dataset_dir') dataset.check_compatibility(model) self.assertIn('input_fn must contain at least the feature keys', str(context.exception)) def testModelIsCompatibleWithDataset(self): model = Model( feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING}, prediction_keys='prediction_key', model_names='None', project_name=None) def input_fn(max_n_examples): return pd.DataFrame({ 'comment_text': ['This is one'] * max_n_examples, 'label_name': [0] * max_n_examples }) try: dataset = Dataset(input_fn, 'dataset_dir') dataset.check_compatibility(model) except ValueError: self.fail('Dataset raised an exception unexpectedly!') class TestEndPipeline(unittest.TestCase): """Verifies end-to-end use of dataset.""" test_version = str(int(time.time())) def setUp(self): def input_fn_test(max_n_examples): return pd.DataFrame( {'comment_text': [['This', 'is', 'one']] * max_n_examples}) gcs_path_test = os.path.join('gs://kaggle-model-experiments/', getpass.getuser(), 'unittest', 'dataset_test', TestEndPipeline.test_version) self.dataset = Dataset(input_fn_test, gcs_path_test) self.dataset.load_data(5) model_input_spec = { 'comment_text': EncodingFeatureSpec.LIST_STRING, } self.model = Model( feature_keys_spec=model_input_spec, prediction_keys='frac_neg/logistic', example_key='comment_key', model_names=[ 'tf_gru_attention:v_20180914_163804', 'tf_gru_attention:v_20180823_133625' ], project_name='wikidetox') def testComputePredictions(self): try: self.dataset.add_model_prediction_to_data(self.model) except ValueError: self.fail('Dataset raised an exception unexpectedly!') def testLoadPredictions(self): try: self.dataset.add_model_prediction_to_data( self.model, recompute_predictions=False) except ValueError: self.fail('Dataset raised an exception unexpectedly!') if __name__ == '__main__': unittest.main() ================================================ FILE: model_evaluation/utils_export/deploy_list_models.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Deploys all models that have been saved in a list of directories.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse import datetime import os import sys import time from googleapiclient import discovery from googleapiclient import errors import tensorflow as tf from tensorflow.python.lib.io import file_io from tensorflow.python.platform import tf_logging as logging # Maximum number of version that can be created concurrently. CLOUD_ML_VERSION_CREATE_QUOTA = 10 def get_list_models_to_export(parent_model_dir): """Gets the paths of all models that are in parent_model_dir.""" _list = [] for subdirectory, _, files in tf.gfile.Walk(parent_model_dir): if 'saved_model.pb' in files: # Indicator of a saved model. _list.append(subdirectory) return _list def check_model_exists(project_name, model_name): """Verifies if a model name is deployed already on CMLE.""" ml = discovery.build('ml', 'v1') model_id = 'projects/{}/models/{}'.format(project_name, model_name) request = ml.projects().models().get(name=model_id) try: response = request.execute() return True except: return False def create_model(project_name, model_name): """Creates a model on CMLE.""" ml = discovery.build('ml', 'v1') request_dict = {'name': model_name} project_id = 'projects/{}'.format(project_name) request = ml.projects().models().create(parent=project_id, body=request_dict) try: response = request.execute() except errors.HttpError as err: raise ValueError('There was an error creating the model.' + ' Check the details: {}'.format(err._get_reason())) def create_version(project_name, model_name, version_name, model_dir): """Creates a version of a model on CMLE.""" ml = discovery.build('ml', 'v1') request_dict = { 'name': version_name, 'deploymentUri': model_dir, 'runtimeVersion': '1.10' } model_id = 'projects/{}/models/{}'.format(project_name, model_name) request = ml.projects().models().versions().create( parent=model_id, body=request_dict) try: response = request.execute() operation_id = response['name'] return operation_id except errors.HttpError as err: raise ValueError('There was an error creating the version.' + ' Check the details:'.format(err._get_reason())) def check_version_deployed(operation_id): """Loops until the version has been deployed on CMLE.""" ml = discovery.build('ml', 'v1') request = ml.projects().operations().get(name=operation_id) done = False while not done: response = None time.sleep(0.3) try: response = request.execute() done = response.get('done', False) except errors.HttpError as err: raise ValueError('There was an error getting the operation.' + ' Check the details: {}'.format(err._get_reason())) done = True def deploy_model_version(project_name, model_name, version_name, model_dir): """Deploys one TF model on CMLE. Args: project_name: Name of a CMLE project. model_name: Name of the model to deploy. If it does not exist yet, the model will be created. version_name: Version of the model on CMLE. Model_dir: Where to find the exported model. """ if not check_model_exists(project_name, model_name): create_model(project_name, model_name) operation_id = create_version(project_name, model_name, version_name, model_dir) return operation_id def _get_version_name(model_dir, go_up_3=True): """Looks for the version_name in the model_directory name. Example: model_dir = gs://.../20190328_103329/model_dir/102500/1553798665/ If go_up_3, it will grab '20190328_103329' if not, it will grab '1553798665'. Typically speaking, set up go_up_3=False if a model_run has several exported models.""" if go_up_3: name = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(model_dir)))) return 'v_{}'.format(os.path.basename(name)) else: return 'v_{}'.format(os.path.basename(os.path.dirname(model_dir))) def deploy_all_models(list_model_dir, project_name, model_name): """Finds and deploys all models present a list of directories. Args: list_model_dir: List of directories to explore. project_name: Name of the project. model_name: Name of the model. All the model found in the parent_dir will be saved within the same main model. """ models = [] for _model_dir in args.list_model_dir.split(','): models.extend(get_list_models_to_export(_model_dir)) logging.info('Exploration finished: {} models detected.'.format( len(models))) num_epochs = int(len(models) / CLOUD_ML_VERSION_CREATE_QUOTA) for i in range(0, num_epochs + 1): indices = range(i * CLOUD_ML_VERSION_CREATE_QUOTA, (i + 1) * CLOUD_ML_VERSION_CREATE_QUOTA) operation_id_list = [] for j in indices: if j >= len(models): break version_name = _get_version_name(models[j]) operation_id = deploy_model_version( project_name=project_name, model_name=model_name, version_name=version_name, model_dir=models[j]) operation_id_list.append(operation_id) logging.info('Waiting for versions to be deployed...') for operation_id in operation_id_list: check_version_deployed(operation_id) logging.info('DONE. {} models have been deployed'.format(len(models))) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--list_model_dir', help='List of the model directory (comma separated).', required=True ) parser.add_argument( '--project_name', help='Name of GCP project.', default='conversationai-models') parser.add_argument( '--model_name', help='Name of the model on CMLE.', default='tf_test') args = parser.parse_args(args=sys.argv[1:]) tf.logging.set_verbosity(tf.logging.INFO) deploy_all_models(args.list_model_dir, args.project_name, args.model_name) ================================================ FILE: model_evaluation/utils_export/utils_cloudml.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines some utilities to use cloud MLE batch prediction jobs.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import datetime import json import os import re import time import googleapiclient.discovery as discovery import googleapiclient.errors as errors import tensorflow as tf from tensorflow.python.lib.io import file_io from tensorflow.python.platform import tf_logging as logging def call_model_predictions_from_df(project_name, input_tf_records, output_prediction_path, model_name, version_name=None): """Calls a prediction job. Args: project_name: gcp project name. input_tf_records: gcs path to input tf_records. output_prediction_path: gcs path to store tf_records, which will be outputs to batch prediction job. model_name: Model name used to run predictions. The model must take as inputs TF-Records with fields $TEXT_FEATURE_NAME and $SENTENCE_KEY, and should return a dictionary including the field $LABEL_NAME. version_name: Model version to run predictions. If None, it will use default version of the model. Returns: job_id: the job_id of the prediction job. Raises: ValueError: if input_tf_records does not exist. """ # Create tf-records if necessary. if not file_io.file_exists(input_tf_records): raise ValueError('tf_records do not exist.') # Call batch prediction job. job_id = _call_batch_job( project_name, input_paths=input_tf_records, output_path=output_prediction_path, model_name=model_name, version_name=version_name) return job_id def _call_batch_job(project_name, input_paths, output_path, model_name, version_name=None): """Calls a batch prediction job on Cloud MLE.""" batch_predict_body = _make_batch_job_body( project_name, input_paths, output_path, model_name, version_name=version_name) project_id = 'projects/{}'.format(project_name) ml = discovery.build('ml', 'v1') request = ml.projects().jobs().create( parent=project_id, body=batch_predict_body) try: response = request.execute() logging.info('state : {}'.format(response['state'])) return response['jobId'] except errors.HttpError as err: # Something went wrong, print out some information. logging.info('There was an error getting the prediction results.' 'Check the details:') logging.info(err._get_reason()) def _make_batch_job_body(project_name, input_paths, output_path, model_name, region='us-central1', data_format='TF_RECORD', version_name=None, max_worker_count=None, runtime_version=None): """Creates the request body for Cloud MLE batch prediction job.""" project_id = 'projects/{}'.format(project_name) model_id = '{}/models/{}'.format(project_id, model_name) if version_name: version_id = '{}/versions/{}'.format(model_id, version_name) # Make a jobName of the format "model_name_batch_predict_YYYYMMDD_HHMMSS" timestamp = time.strftime('%Y%m%d_%H%M%S', time.gmtime()) # Make sure the project name is formatted correctly to work as the basis # of a valid job name. clean_project_name = re.sub(r'\W+', '_', project_name) job_id = '{}_{}_{}'.format(clean_project_name, model_name, timestamp) # Start building the request dictionary with required information. body = { 'jobId': job_id, 'predictionInput': { 'dataFormat': data_format, 'inputPaths': input_paths, 'outputPath': output_path, 'region': region } } # Use the version if present, the model (its default version) if not. if version_name: body['predictionInput']['versionName'] = version_id else: body['predictionInput']['modelName'] = model_id # Only include a maximum number of workers or a runtime version if specified. # Otherwise let the service use its defaults. if max_worker_count: body['predictionInput']['maxWorkerCount'] = max_worker_count if runtime_version: body['predictionInput']['runtimeVersion'] = runtime_version return body def check_job_over(project_name, job_name): """Sleeps until the batch job is over.""" ml = discovery.build('ml', 'v1') request = ml.projects().jobs().get( name='projects/{}/jobs/{}'.format(project_name, job_name)) job_completed = False k = 0 start_time = datetime.datetime.now() while not job_completed: response = request.execute() job_completed = (response['state'] == 'SUCCEEDED') if not job_completed: if not (k % 5): time_spent = int( (datetime.datetime.now() - start_time).total_seconds() / 60) logging.info( 'Waiting for prediction job to complete. Minutes elapsed: {}' .format(time_spent)) time.sleep(30) k += 1 logging.info('Prediction job completed.') def add_model_predictions_to_df(df, prediction_file, model_col_name, prediction_name, example_key, class_names): """Loads the prediction files and adds the model scores to a DataFrame. Args: df: a pandas `DataFrame`. prediction_file: Path to the prediction files (outputs of CMLE prediction job). model_col_name: Column name of the prediction values in df (added column). prediction_name: Name of the column to retrieve from CMLE predictions. example_key: key identifier of an example. class_names: If the model is a multiclass model, you can specify class names. The model will then return a logit value per class instead of a single value. Returns: df: a pandas ` DataFrame` with an added column named 'column_name_of_model' containing the prediction values. Raises: ValueError: dataframe and prediction file do not correspond exactly In particular, they must have same number of lines and same order. ValueError: prediction file does not exist. This function reads the prediction file and extracts the fields 'prediction_name' and example_key. It orders the results based on example_key and then adds them to df in a new column called 'model_col_name'. """ prediction_file = os.path.join(prediction_file, 'prediction.results-00000-of-00001') if not tf.gfile.Exists(prediction_file): raise ValueError( 'Prediction file does not exist.' ' You need to call prediction job and wait for completion.') def _load_predictions(pred_file): with file_io.FileIO(pred_file, 'r') as f: # prediction file needs to fit in memory. try: predictions = [json.loads(line) for line in f] except: predictions = [] return predictions predictions = _load_predictions(prediction_file) if not predictions: raise ValueError( 'The prediction file returned by CMLE is empty.' ' It might be due to a badly formatted tfrecord input file that can not be' ' parsed by CMLE (wrong input signature given by a `Model` instance).' ' Check the logs of your CMLE job for further details.') if example_key not in predictions[0]: raise ValueError( "Predictions do not contain the 'example_key' field." " Verify that your 'example_key' parameter (set to {})" " matches the CMLE model signature.".format(example_key)) if prediction_name not in predictions[0]: raise ValueError( "Predictions do not contain the 'prediction_name' field." " Verify that your 'prediction_name' parameter (set to {})" " matches the CMLE model signature.".format(prediction_name)) if len(predictions) != len(df): raise ValueError('The dataframe and the prediction file do not contain' ' the same number of lines.') predictions = sorted(predictions, key=lambda x: x[example_key]) if class_names is None: prediction_proba = [x[prediction_name][0] for x in predictions] df[model_col_name] = prediction_proba else: for i, class_name in enumerate(class_names): df['{}_{}'.format(model_col_name,class_name)] = [x[prediction_name][i] for x in predictions] return df ================================================ FILE: model_evaluation/utils_export/utils_cloudml_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for tf records utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import pandas as pd import unittest import utils_cloudml class CallModelPredictionsFromDf(unittest.TestCase): """Tests for `call_model_predictions_from_df`.""" #TODO(fprost): Implement these. def test_correct(self): return class CheckJobOver(unittest.TestCase): """Tests for `check_job_over`.""" # TODO(fprost): Implement these. def test_correct(self): return class AddModelPredictionsToDf(unittest.TestCase): """Tests for `add_model_predictions_to_df`.""" def setUp(self): self.COMMENT_KEY = 'comment_key' self._df = pd.DataFrame({ self.COMMENT_KEY: [0, 1], 'other_field_1': ['I am a man', 'I am a woman'], }) self._prediction_file = 'gs://kaggle-model-experiments/files_for_unittest/model1:v1' self._model_col_name = 'model1:v1_preds' self._prediction_name = 'toxicity/logistic' self._example_key = self.COMMENT_KEY def test_missing_prediction_file(self): path = 'not_existing_folder/not_existing_file_path' with self.assertRaises(Exception) as context: utils_cloudml.add_model_predictions_to_df( self._df, path, self._model_col_name, self._prediction_name, self._example_key) self.assertIn( 'Prediction file does not exist.', str(context.exception)) def test_empty_prediction_file(self): path = 'gs://kaggle-model-experiments/files_for_unittest/for_empty_predictions' with self.assertRaises(Exception) as context: utils_cloudml.add_model_predictions_to_df( self._df, path, self._model_col_name, self._prediction_name, self._example_key) self.assertIn( 'The prediction file returned by CMLE is empty.', str(context.exception)) def test_missing_example_key(self): example_key = 'not_found_example_key' with self.assertRaises(Exception) as context: utils_cloudml.add_model_predictions_to_df( self._df, self._prediction_file, self._model_col_name, self._prediction_name, example_key, ) self.assertIn( "Predictions do not contain the 'example_key' field.", str(context.exception)) def test_missing_prediction_key(self): prediction_key = 'not_found_prediction_key' with self.assertRaises(Exception) as context: utils_cloudml.add_model_predictions_to_df( self._df, self._prediction_file, self._model_col_name, prediction_key, self._example_key) self.assertIn( "Predictions do not contain the 'prediction_name' field.", str(context.exception)) def test_correct(self): output_df = utils_cloudml.add_model_predictions_to_df( self._df, self._prediction_file, self._model_col_name, self._prediction_name, self._example_key) right_output = pd.DataFrame({ self.COMMENT_KEY: [0, 1], 'other_field_1': ['I am a man', 'I am a woman'], self._model_col_name: [0.38753455877304077, 0.045782867819070816] }) pd.testing.assert_frame_equal( output_df.sort_index(axis=1), right_output.sort_index(axis=1)) if __name__ == '__main__': unittest.main() ================================================ FILE: model_evaluation/utils_export/utils_tfrecords.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines some utilities to use TF-Records with pandas DataFrame.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import pandas as pd import random import re import tensorflow as tf from tensorflow.python.lib.io import file_io from tensorflow.python.platform import tf_logging as logging def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def _bytes_list_feature(value_list): return tf.train.Feature( bytes_list=tf.train.BytesList( value=[tf.compat.as_bytes(value) for value in value_list])) class EncodingFeatureSpec(object): INTEGER = 'integer' STRING = 'string' LIST_STRING = 'list_string' CONSTRUCTOR_PER_TYPE = { INTEGER: _int64_feature, STRING: _bytes_feature, LIST_STRING: _bytes_list_feature } def is_valid_spec(spec): """Verfies that the spec matches requirements.""" if not isinstance(spec, dict): raise ValueError('Spec should be a dictionary instance.') for (key, item) in spec.items(): if not isinstance(key, str): raise ValueError( 'Spec is badly defined. Keys should be string (field names).') if item not in EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE.keys(): raise ValueError( 'Spec is badly defined. Authorized types are one of {}.'.format( EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE.keys())) def encode_pandas_to_tfrecords(df, feature_keys_spec, tf_records_path, example_key=None): """Write a pandas `DataFrame` to a tf_record. Args: df: pandas `DataFrame`. It must include the fields that are part of feature_key_spec. feature_keys_spec: Dict of {name: type}, which describes the spec of the TF-records. tf_records_path: where to write the tf records. example_key: key identifier of an example (string). This key will be added to data automatically and should not be part of df. If none, no example_key will be created. Raises: ValueError if feature_keys_spec does not follow a FeatureSpec format. Note: TFRecords will have fields feature_keys_spec and `example_key`(optional). """ is_valid_spec(feature_keys_spec) writer = tf.python_io.TFRecordWriter(tf_records_path) for i in range(len(df)): if not i % 10000: logging.info('Preparing train data: {}/{}'.format(i, len(df))) # Create a feature feature_dict = {} for feature in feature_keys_spec: constructor = EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE[ feature_keys_spec[feature]] feature_dict[feature] = constructor(df[feature].iloc[i]) if example_key: feature_dict[example_key] = _int64_feature(i) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) # Serialize to string and write on the file writer.write(example.SerializeToString()) writer.close() def decode_tf_records_to_pandas(decoding_features_spec, tf_records_path, max_n_examples=None, random_filter_keep_rate=1.0, filter_fn=None): """Loads tf-records into a pandas dataframe. Args: decoding_features_spec: A dict mapping feature keys to FixedLenFeature values. Spec of the tf-records. tf_records_path: path to the file max_n_examples: Maximum number of examples to extract. random_filter_keep_rate: Probability for each line to be kept in training data. For each line, we generate a random number x and keep it if x < random_filter_keep_rate. filter_fn (optional): Function applied to an example. If it returns False, the example will be discarded. Returns: A pandas `DataFrame`. """ if not max_n_examples: max_n_examples = float('inf') reader = tf.TFRecordReader() filenames = tf.train.match_filenames_once(tf_records_path) filename_queue = tf.train.string_input_producer(filenames, num_epochs=1) _, serialized_example = reader.read(filename_queue) read_data = tf.parse_single_example( serialized=serialized_example, features=decoding_features_spec) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) tf.train.start_queue_runners(sess) d = [] new_line = sess.run(read_data) count = 0 while new_line: if filter_fn: keep_line = filter_fn(new_line) else: keep_line = True keep_line = keep_line and (random.random() < random_filter_keep_rate) if keep_line: d.append(new_line) count += 1 if count >= max_n_examples: break if not (count % 100000): logging.info('Loaded {} lines.'.format(count)) try: new_line = sess.run(read_data) except tf.errors.OutOfRangeError: logging.info('End of file.') break res = pd.DataFrame(d) return res ================================================ FILE: model_evaluation/utils_export/utils_tfrecords_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for tf records utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import unittest import pandas as pd import tensorflow as tf import utils_tfrecords class TestEncodingAndDecoding(unittest.TestCase): """Test to encode and decode a pandas DataFrame""" def testCorrect(self): input_df = pd.DataFrame({ 'x': [1, 2, 3], 'y': ['a', 'b', 'c'], 'z': [['a', 'b'], ['c', 'd'], ['e', 'f']] }) encoding_feature_spec = { 'x': utils_tfrecords.EncodingFeatureSpec.INTEGER, 'y': utils_tfrecords.EncodingFeatureSpec.STRING, 'z': utils_tfrecords.EncodingFeatureSpec.LIST_STRING } decoding_spec = { 'x': tf.FixedLenFeature([], dtype=tf.int64), 'y': tf.FixedLenFeature([], dtype=tf.string), 'z': tf.FixedLenFeature([2], dtype=tf.string), } tf_records_path = 'unittest.tf_records' utils_tfrecords.encode_pandas_to_tfrecords(input_df, encoding_feature_spec, tf_records_path) output_df = utils_tfrecords.decode_tf_records_to_pandas( decoding_spec, tf_records_path) try: pd.testing.assert_frame_equal(input_df, output_df) except ValueError: self.fail('Dataset raised an exception unexpectedly!') class TestFeatureKeySpec(unittest.TestCase): """Verifies the format of Feature Spec""" def test_not_a_dictionary(self): feature_keys_spec = 'not_a_dict', with self.assertRaises(Exception) as context: utils_tfrecords.is_valid_spec(feature_keys_spec) self.assertIn('Spec should be a dictionary instance.', str(context.exception)) def test_not_in_possible(self): feature_keys_spec = {'key': 'other_possibility'} with self.assertRaises(Exception) as context: utils_tfrecords.is_valid_spec(feature_keys_spec) self.assertIn('Spec is badly defined. Authorized types are one of', str(context.exception)) def test_valid(self): try: feature_keys_spec = { 'comment_text': utils_tfrecords.EncodingFeatureSpec.LIST_STRING } utils_tfrecords.is_valid_spec(feature_keys_spec) except ValueError: self.fail('Dataset raised an exception unexpectedly!') if __name__ == '__main__': unittest.main() ================================================ FILE: travis_blase_test_support/bazel_0.18.1-linux-x86_64.deb.sha256 ================================================ 4c2cd0a71ab1b65753aeb757af36bd6ebde9da4e53183525a1e1849c2542fdda bazel_0.18.1-linux-x86_64.deb