[
  {
    "path": ".bazelrc",
    "content": "startup --host_jvm_args=-Xmx2500m\nstartup --host_jvm_args=-Xms2500m\nstartup --batch\ntest --ram_utilization_factor=10\n\nbuild --verbose_failures\nbuild --spawn_strategy=standalone --genrule_strategy=standalone\ntest --test_strategy=standalone\n"
  },
  {
    "path": ".gitignore",
    "content": "# Editor config.\n.vscode/\n\n# Python Compiles files.\n*.pyc\n\n# Virtual Environment files.\n.pyenv\n.virtualenv\nenv\n.venv\n\n# mypy cache files for type-checking.\n.mypy_cache\n\n# Bazel\nbazel-bin\nbazel-experiments\nbazel-genfiles\nbazel-out\nbazel-testlogs\n"
  },
  {
    "path": ".travis.yml",
    "content": "language: python\n\npython:\n  - \"3.5\"\n  - \"3.6\"\n\ndist: trusty\n\naddons:\n  apt:\n    sources:\n      - ubuntu-toolchain-r-test\n    packages:\n      - wget\n      - pkg-config\n\nbefore_install:\n  - wget https://github.com/bazelbuild/bazel/releases/download/0.18.1/bazel_0.18.1-linux-x86_64.deb\n  - sha256sum -c travis_blase_test_support/bazel_0.18.1-linux-x86_64.deb.sha256\n  - sudo dpkg -i bazel_0.18.1-linux-x86_64.deb\n  - cd experiments\n\ninstall:\n  - pip install -r requirements.txt\n\nscript:\n  - bazel test --test_output=streamed ...\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# How to contribute\n\nWe'd love to accept your patches and contributions to this project. There are\njust a few small guidelines you need to follow.\n\n## Contributor License Agreement\n\nContributions to this project must be accompanied by a Contributor License\nAgreement. You (or your employer) retain the copyright to your contribution,\nthis simply gives us permission to use and redistribute your contributions as\npart of the project. Head over to <https://cla.developers.google.com/> to see\nyour current agreements on file or to sign a new one.\n\nYou generally only need to submit a CLA once, so if you've already submitted one\n(even if it was for a different project), you probably don't need to do it\nagain.\n\n## Code reviews\n\nAll submissions, including submissions by project members, require review. We\nuse GitHub pull requests for this purpose. Consult [GitHub Help] for more\ninformation on using pull requests.\n\n[GitHub Help]: https://help.github.com/articles/about-pull-requests/\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"{}\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright {yyyy} {name of copyright owner}\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "# ConversationAI Models\n\nThis repository is contains example code to train machine learning models for text classification as part of the [Conversation AI](https://conversationai.github.io/) project.\n\n# Outline of the codebase\n\n* `experiments/` contains the ML training framework.\n* `annotator-models/` contains a Dawid-Skene implementation for modelling rater quality to produce better annotations.\n* `attention-tutorial/` contains an introductory ipython notebook for RNNs with attention, as presented at Devoxx talk [\"Tensorflow, deep learning and modern RNN architectures, without a PhD by Martin Gorner\"](https://www.youtube.com/watch?v=pzOzmxCR37I)\n* `kaggle-classification/` early experiments with Keras and Estimator for training on [the Jigsaw Toxicity Kaggle competition](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge). Will be superceeded by `experiments/` shortly.\n* `model_evaluation/` contains utilities to use a model deployed on cloud MLE, and some notebooks to illustrate typical evaluation metrics.\n\n## About this code\n\nThis repository contains example code to help experiment with models to improve conversations; it is not an official Google product.\n"
  },
  {
    "path": "annotator_models/README.md",
    "content": "# Modeling Anotators\n\nThis is an implementation of the [Dawid-Skene model](http://crowdsourcing-class.org/readings/downloads/ml/EM.pdf). Dawid-Skene is an unsupervised model that can be used to improve the quality of a crowdsourced dataset by learning annotator error rate and predicting the true item labels.\n\nThis code was adapted from an [implementation](https://github.com/dallascard/dawid_skene) by [dallascard](https://github.com/dallascard).\n\n## To Run Locally\n\n1.  Setup a [virtualenv](https://virtualenvwrapper.readthedocs.io/en/latest/) for\n    the project (recommended, but technically optional).\n\n    Python 2:\n\n    ```\n    python -m virtualenv env\n    ```\n\n    Python 3:\n\n    ```\n    python3 -m venv env\n    ```\n\n    From either to enter your virtual env:\n\n    ```shell\n    source env/bin/activate\n    ```\n\n2.  Install library dependencies:\n\n    ```shell\n    pip install -r requirements.txt\n    ```\n\n3.  Create training data. The training data must be a CSV that has fields for\n    the worker ID, item ID and label. You can specify the column names for these\n    fields as flags to the training script.\n\n    For example:\n    ```\n    comment_id,worker_id,toxic\n    1519346288,43675129,0\n    1519346288,41122119,0\n    1519346288,38510102,0\n    1519346288,43650017,0\n    1519346288,28524232,0\n    ...\n    ```\n\n4.  Run a model on a given class (e.g. 'toxic' or 'obscene'). There are examples\n    of how to run the model locally and using ml-engine in [`bin/run_local`](bin/run_local) and\n    [`bin/run`](bin/run) respectively.\n\n    Note: to run in google cloud, you will need to be authenticated with\n    Google Cloud (you can run `gcloud auth application-default login` to do\n    this) and you must have access to the cloud bucket where the data is located\n    (you can test this by running `gcloud storage ls  gs://kaggle-model-experiments/`).\n\n5. The output is two files written to the `job-dir` directory specified in the run\n    script.\n   * `error_rates_{LABEL}_{N_ANNOTATIONS}.csv` - the error rates for each annotator\n   * `predictions_{LABEL}_{N_ANNOTATIONS}.csv` - the predicted labels for each item"
  },
  {
    "path": "annotator_models/bin/cancel-job",
    "content": "#!/bin/bash\n\ngcloud ml-engine jobs cancel $1\n"
  },
  {
    "path": "annotator_models/bin/ls-jobs",
    "content": "#!/bin/bash\n\ngcloud ml-engine jobs list | grep $USER\n"
  },
  {
    "path": "annotator_models/bin/run",
    "content": "#!/bin/bash\n\n#\n# A script to train the kaggle model remotely using ml-engine.\n#\n# To run with default hyperparameters from the kaggle-classification directory just enter:\n# './bin/run'\n#\n#\n#\n# Setup Steps:\n# 1. Install the gcloud SDK\n# 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]`\n# 3. Put the train and test data in Cloud Storage, `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/`\n#\n\n# Edit these!\nBUCKET_NAME=annotator_models\nCONFIG=cpu_config.yaml\nMAX_ITER=50\nTOLERANCE=1\nPSEUDO_COUNT=1\n\ndeclare -a LABELS=(\"obscene\" \"sexual_explicit\" \"threat\" \"flirtation\" \"identity_hate\" \"insult\")\n\n# Note: this must be compatible with cells that have GPUs. us-central1 works.\n# See: https://cloud.google.com/ml-engine/docs/using-gpus\nREGION=us-central1\n\nwhile getopts :c:h opt; do\ncase ${opt} in\nh)\n    echo \"Usage: run [-c config_filename.yaml]\"\n    echo \"Flags: \"\n    echo -e \" -c Specify a config file (e.g. use hparam_config to enable hyperparameter tuning)\"\n    exit 0;;\nc)\n    echo \"Using custom config ${OPTARG}\"\n    CONFIG=${OPTARG};;\n:)\n    echo \"Error: ${OPTARG} requires an argument.\"\n    echo \"Use 'run -h' for help.\"\n    exit 1;;\n\\?)\n    echo \"Invalid flag. Use 'run -h' for help.\"\n    exit 1;;\nesac\ndone\n\n\necho \"Writing to $OUTPUT_PATH\"\n\n\nfor label in \"${LABELS[@]}\"\ndo\n  echo \"Running on $label\"\n  DATA_PATH=gs://annotator_models/kaggle_annotation_data/dawid_skene_annotations_on_kaggle_combined_${label}.csv\n  COMMENT_TEXT_PATH=$DATA_PATH\n\n  JOB_NAME=${USER}_dawid_skene_kaggle_${label}\n  DATE=`date '+%Y%m%d_%H%M%S'`\n  DATE_DAY_ONLY=`date '+%Y%m%d'`\n  OUTPUT_PATH=gs://${BUCKET_NAME}/models/${USER}/${DATE_DAY_ONLY}\n\n  gcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \\\n         --job-dir=${OUTPUT_PATH} \\\n         --runtime-version=1.4 \\\n         --config=${CONFIG} \\\n         --module-name=trainer.dawid_skene \\\n         --package-path=trainer \\\n         --region=$REGION \\\n         --verbosity=debug -- \\\n         --data-path=$DATA_PATH \\\n         --comment-text-path=$COMMENT_TEXT_PATH \\\n         --label=$label \\\n         --max-iter=$MAX_ITER \\\n         --tolerance=$TOLERANCE \\\n         --worker-id-col='annotator_id' \\\n         --unit-id-col='comment_id' \\\n\t --pseudo-count=$PSEUDO_COUNT\ndone\n"
  },
  {
    "path": "annotator_models/bin/run_local",
    "content": "#!/bin/bash\n\n# A script to train the kaggle model locally.\n\nDATE=`date '+%Y%m%d_%H%M%S'`\nBUCKET_NAME=annotator_models\n\ndeclare -a arr=(\"obscene\" \"threat\" \"insult\" \"identity_hate\" \"toxic\" \"severe_toxic\")\n\n\n## now loop through the above array\nfor label in \"${arr[@]}\"\ndo\n  data_path=gs://${BUCKET_NAME}/kaggle_annotation_data/dawid_skene_annotations_on_kaggle_test_${label}.csv \\\n\n  gcloud ml-engine local train \\\n     --module-name=trainer.dawid_skene \\\n     --package-path=trainer -- \\\n     --data-path=${data_path} \\\n     --comment-text-path=${data_path} \\\n     --label=${label} \\\n     --job-dir='results' \\\n     --worker-id-col='annotator_id' \\\n     --unit-id-col='comment_id' \\\n     --tolerance=50 \\\n     --n_examples=1000\ndone\n"
  },
  {
    "path": "annotator_models/bin/stream-logs",
    "content": "#!/bin/bash\n\ngcloud ml-engine jobs stream-logs $1\n"
  },
  {
    "path": "annotator_models/cpu_config.yaml",
    "content": "trainingInput:\n  scaleTier: CUSTOM\n  ## Custom scaleTier needed for using > 1 GPU machines.\n  # scaleTier: CUSTOM\n  masterType: large_model\n  # workerType: complex_model_m_gpu\n  # parameterServerType: large_model\n  # workerCount: 9\n  # parameterServerCount: 3\n"
  },
  {
    "path": "annotator_models/requirements.txt",
    "content": "absl-py==0.1.12\nastor==0.6.2\nbackports.weakref==1.0.post1\nbleach==3.3.0\ncachetools==2.0.1\ncertifi==2024.7.4\nchardet==3.0.4\ndill==0.2.7.1\nenum34==1.1.6\nfuncsigs==1.0.2\nfuture==0.18.3\nfutures==3.2.0\ngapic-google-cloud-datastore-v1==0.15.3\ngapic-google-cloud-error-reporting-v1beta1==0.15.3\ngapic-google-cloud-logging-v2==0.91.3\ngast==0.2.0\ngoogle-api-core==1.1.0\ngoogle-auth==1.4.1\ngoogle-auth-oauthlib==0.2.0\ngoogle-cloud==0.32.0\ngoogle-cloud-bigquery==0.31.0\ngoogle-cloud-bigquery-datatransfer==0.1.1\ngoogle-cloud-bigtable==0.28.1\ngoogle-cloud-container==0.1.1\ngoogle-cloud-core==0.28.1\ngoogle-cloud-datastore==1.4.0\ngoogle-cloud-dns==0.28.0\ngoogle-cloud-error-reporting==0.28.0\ngoogle-cloud-firestore==0.28.0\ngoogle-cloud-language==1.0.1\ngoogle-cloud-logging==1.4.0\ngoogle-cloud-monitoring==0.28.1\ngoogle-cloud-pubsub==0.30.1\ngoogle-cloud-resource-manager==0.28.1\ngoogle-cloud-runtimeconfig==0.28.1\ngoogle-cloud-spanner==0.29.0\ngoogle-cloud-speech==0.30.0\ngoogle-cloud-storage==1.6.0\ngoogle-cloud-trace==0.17.0\ngoogle-cloud-translate==1.3.1\ngoogle-cloud-videointelligence==1.0.1\ngoogle-cloud-vision==0.29.0\ngoogle-gax==0.15.16\ngoogle-resumable-media==0.3.1\ngoogleapis-common-protos==1.5.3\ngrpc-google-iam-v1==0.11.4\ngrpcio==1.53.2\nhtml5lib==0.999999999\nhttplib2==0.19.0\nidna==3.7\nMarkdown==2.6.11\nmock==2.0.0\nnumpy==1.22.0\noauth2client==3.0.0\noauthlib==2.0.7\npandas==0.22.0\npandas-gbq==0.3.1\npbr==4.0.0\nply==3.8\nproto-google-cloud-datastore-v1==0.90.4\nproto-google-cloud-error-reporting-v1beta1==0.15.3\nproto-google-cloud-logging-v2==0.91.3\nprotobuf==3.18.3\npsutil==5.6.6\npyasn1==0.4.2\npyasn1-modules==0.2.1\npython-dateutil==2.7.2\npytz==2018.3\nrequests==2.32.0\nrequests-oauthlib==0.8.0\nrsa==4.7\nsix==1.11.0\ntensorboard==1.12.0\ntensorflow==2.12.1\ntermcolor==1.1.0\nurllib3==1.26.18\nWerkzeug==3.0.3\n"
  },
  {
    "path": "annotator_models/results/.gitignore",
    "content": "*\n!.gitignore\n"
  },
  {
    "path": "annotator_models/trainer/__init__.py",
    "content": ""
  },
  {
    "path": "annotator_models/trainer/dawid_skene.py",
    "content": "\"\"\"Description: Given unreliable ratings of items classes by multiple raters, determine the most likely true class for each item, class marginals, and  individual error rates for each rater, using Expectation Maximization\n\nReferences:\n( Dawid and Skene (1979). Maximum Likelihood Estimation of Observer\nError-Rates Using the EM Algorithm. Journal of the Royal Statistical Society.\nSeries C (Applied Statistics), Vol. 28, No. 1, pp. 20-28.\n\"\"\"\n\nimport argparse\nimport logging\nimport math\nimport sys\nimport time\n\nimport numpy as np\nimport pandas as pd\nfrom scipy import stats\nimport tensorflow as tf\n\n\nFLAGS = None\nnp.set_printoptions(precision=2)\n\n\ndef run(items,\n        raters,\n        classes,\n        counts,\n        label,\n        psuedo_count,\n        tol=1,\n        max_iter=25,\n        init='average'):\n  \"\"\"\n    Run the Dawid-Skene estimator on response data\n\n    Input:\n      responses: a pandas DataFrame of ratings where each row is a rating from\n                 some rater ('_worker_id') on some item ('_unit_id')\n      tol: tolerance required for convergence of EM\n      max_iter: maximum number of iterations of EM\n    \"\"\"\n\n  # initialize\n  iteration = 0\n  converged = False\n  old_class_marginals = None\n  old_error_rates = None\n\n  # item_classes is a matrix of estimates of true item classes of size\n  # [items, classes]\n  item_classes = initialize(counts)\n  [nItems, nRaters, nClasses] = np.shape(counts)\n\n  logging.info('Iter\\tlog-likelihood\\tdelta-CM\\tdelta-Y_hat')\n\n  while not converged:\n    iteration += 1\n    start_iter = time.time()\n\n    # M-step - updated error rates and class marginals given new\n    #          distribution over true item classes\n    old_item_classes = item_classes\n\n    (class_marginals, error_rates) = m_step(counts, item_classes, psuedo_count)\n\n    # E-step - calculate expected item classes given error rates and\n    #          class marginals\n    item_classes = e_step_verbose(counts, class_marginals, error_rates)\n\n    # check likelihood\n    log_L = calc_likelihood(counts, class_marginals, error_rates)\n\n    # calculate the number of seconds the last iteration took\n    iter_time = time.time() - start_iter\n\n    # check for convergence\n    if old_class_marginals is not None:\n      class_marginals_diff = np.sum(\n          np.abs(class_marginals - old_class_marginals))\n      item_class_diff = np.sum(np.abs(item_classes - old_item_classes))\n\n      logging.info('{0}\\t{1:.1f}\\t{2:.4f}\\t\\t{3:.2f}\\t({4:3.2f} secs)'.format(\n          iteration, log_L, class_marginals_diff, item_class_diff, iter_time))\n\n      if (class_marginals_diff < tol and item_class_diff < tol) \\\n         or iteration > max_iter:\n        converged = True\n    else:\n      logging.info('{0}\\t{1:.1f}'.format(iteration, log_L))\n\n    # update current values\n    old_class_marginals = class_marginals\n    old_error_rates = error_rates\n\n  return class_marginals, error_rates, item_classes\n\n\ndef load_data(path, unit_id, worker_id, label):\n  logging.info('Loading data from {0}'.format(path))\n\n  with tf.gfile.Open(path, 'rb') as fileobj:\n    df = pd.read_csv(fileobj, encoding='utf-8')\n\n  # only keep necessary columns\n  df = df[[unit_id, worker_id, label]]\n  return df\n\n\ndef initialize(counts):\n  \"\"\"\n    Get initial estimates for the true item classes using counts\n    see equation 3.1 in Dawid-Skene (1979)\n\n    Input:\n      counts: counts of the number of times each response was given\n          by each rater for each item: [items x raters x classes]. Note\n          in the crowd rating example, counts will be a 0/1 matrix.\n\n    Returns:\n      item_classes: matrix of estimates of true item classes:\n          [items x responses]\n    \"\"\"\n  [nItems, nRaters, nClasses] = np.shape(counts)\n\n  # sum over raters\n  response_sums = np.sum(counts, 1)\n\n  # create an empty array\n  item_classes = np.zeros([nItems, nClasses])\n\n  # for each item, take the average number of ratings in each class\n  for p in range(nItems):\n    item_classes[p, :] = response_sums[p, :] / np.sum(\n        response_sums[p, :], dtype=float)\n\n  return item_classes\n\n\ndef m_step(counts, item_classes, psuedo_count):\n  \"\"\"\n    Get estimates for the prior class probabilities (p_j) and the error\n    rates (pi_jkl) using MLE with current estimates of true item classes\n    See equations 2.3 and 2.4 in Dawid-Skene (1979)\n\n    Input:\n      counts: Array of how many times each rating was given by each rater\n        for each item\n      item_classes: Matrix of current assignments of items to classes\n      psuedo_count: A psuedo count used to smooth the error rates. For each\n      rater k\n        and for each class i and class j, we pretend rater k has rated\n        psuedo_count examples with class i when class j was the true class.\n\n    Returns:\n      p_j: class marginals [classes]\n      pi_kjl: error rates - the probability of rater k giving\n          response l for an item in class j [observers, classes, classes]\n    \"\"\"\n  [nItems, nRaters, nClasses] = np.shape(counts)\n\n  # compute class marginals\n  class_marginals = np.sum(item_classes, axis=0) / float(nItems)\n\n  # compute error rates for each rater, each predicted class\n  # and each true class\n\n  error_rates = np.matmul(counts.T, item_classes) + psuedo_count\n\n  # reorder axes so its of size [nItems x nClasses x nClasses]\n  error_rates = np.einsum('abc->bca', error_rates)\n\n  # divide each row by the sum of the error rates over all observation classes\n  sum_over_responses = np.sum(error_rates, axis=2)[:, :, None]\n\n  # for cases where an annotator has never used a label, set their sum over\n  # responses for that label to 1 to avoid nan when we divide. The result will\n  # be error_rate[k, i, j] is 0 if annotator k never used label i.\n  sum_over_responses[sum_over_responses == 0] = 1\n\n  error_rates = np.divide(error_rates, sum_over_responses)\n\n  return (class_marginals, error_rates)\n\n\ndef m_step_verbose(counts, item_classes, psuedo_count):\n  \"\"\"\n    This method is the verbose (i.e. not vectorized) version of the m_step.\n    It is currently not used because the vectorized version is faster, but we\n    leave it here for future debugging.\n\n    Get estimates for the prior class probabilities (p_j) and the error\n    rates (pi_jkl) using MLE with current estimates of true item classes\n    See equations 2.3 and 2.4 in Dawid-Skene (1979)\n\n    Input:\n      counts: Array of how many times each rating was given by each rater\n        for each item\n      item_classes: Matrix of current assignments of items to classes\n      psuedo_count: A psuedo count used to smooth the error rates. For each\n      rater k\n        and for each class i and class j, we pretend rater k has rated\n        psuedo_count examples with class i when class j was the true class.\n\n    Returns:\n      p_j: class marginals [classes]\n      pi_kjl: error rates - the probability of rater k giving\n          response l for an item in class j [observers, classes, classes]\n    \"\"\"\n  [nItems, nRaters, nClasses] = np.shape(counts)\n\n  # compute class marginals\n  class_marginals = np.sum(item_classes, 0) / float(nItems)\n\n  # compute error rates for each rater, each predicted class\n  # and each true class\n  error_rates = np.zeros([nRaters, nClasses, nClasses])\n  for k in range(nRaters):\n    for j in range(nClasses):\n      for l in range(nClasses):\n        error_rates[k, j, l] = np.dot(item_classes[:,j], counts[:,k,l]) \\\n                               + psuedo_count\n\n      # normalize by summing over all observation classes\n      sum_over_responses = np.sum(error_rates[k, j, :])\n\n      if sum_over_responses > 0:\n        error_rates[k, j, :] = error_rates[k, j, :] / float(sum_over_responses)\n\n  return (class_marginals, error_rates)\n\n\ndef e_step(counts_tiled, class_marginals, error_rates):\n  \"\"\"\n    Determine the probability of each item belonging to each class,\n    given current ML estimates of the parameters from the M-step\n    See equation 2.5 in Dawid-Skene (1979)\n\n    Inputs:\n      counts_tiled: A matrix of how many times each rating was given\n          by each rater for each item, repeated for each class to make matrix\n          multiplication fasterr. Size: [nItems, nRaters, nClasses, nClasses]\n      class_marginals: probability of a random item belonging to each class.\n          Size: [nClasses]\n      error_rates: probability of rater k assigning a item in class j\n          to class l. Size [nRaters, nClasses, nClasses]\n\n    Returns:\n      item_classes: Soft assignments of items to classes\n          [items x classes]\n    \"\"\"\n  [nItems, _, nClasses, _] = np.shape(counts_tiled)\n\n  error_rates_tiled = np.tile(error_rates, (nItems, 1, 1, 1))\n  power = np.power(error_rates_tiled, counts_tiled)\n\n  # Note, multiplying over axis 1 and then 2 is substantially faster than\n  # the equivalent np.prod(power, axis=(1,3)\n  item_classes = class_marginals * np.prod(np.prod(power, axis=1), axis=2)\n\n  # normalize error rates by dividing by the sum over all classes\n  item_sum = np.sum(item_classes, axis=1, keepdims=True)\n  item_classes = np.divide(item_classes, np.tile(item_sum, (1, nClasses)))\n\n  return item_classes\n\n\ndef e_step_verbose(counts, class_marginals, error_rates):\n  \"\"\"\n    This method is the verbose (i.e. not vectorized) version of\n    the e_step. It is actually faster than the vectorized e_step\n    function (16 seconds vs 25 seconds respectively on 10k ratings).\n\n    Determine the probability of each item belonging to each class,\n    given current ML estimates of the parameters from the M-step\n    See equation 2.5 in Dawid-Skene (1979)\n\n    Inputs:\n      counts: Array of how many times each rating was given\n          by each rater for each item\n      class_marginals: probability of a random item belonging to each class\n      error_rates: probability of rater k assigning a item in class j\n          to class l [raters, classes, classes]\n\n    Returns:\n      item_classes: Soft assignments of items to classes\n          [items x classes]\n    \"\"\"\n  [nItems, nRaters, nClasses] = np.shape(counts)\n\n  item_classes = np.zeros([nItems, nClasses])\n\n  for i in range(nItems):\n    for j in range(nClasses):\n      estimate = class_marginals[j]\n      estimate *= np.prod(np.power(error_rates[:, j, :], counts[i, :, :]))\n      item_classes[i, j] = estimate\n\n  # normalize error rates by dividing by the sum over all classes\n  item_sum = np.sum(item_classes, axis=1, keepdims=True)\n  item_classes = np.divide(item_classes, np.tile(item_sum, (1, nClasses)))\n\n  return item_classes\n\n\ndef calc_likelihood(counts, class_marginals, error_rates):\n  \"\"\"\n    Calculate the likelihood given the current parameter estimates\n    This should go up monotonically as EM proceeds\n    See equation 2.7 in Dawid-Skene (1979)\n\n    Inputs:\n      counts: Array of how many times each response was received\n          by each rater from each item\n      class_marginals: probability of a random item belonging to each class\n      error_rates: probability of rater k assigning a item in class j\n          to class l [raters, classes, classes]\n\n    Returns:\n      Likelihood given current parameter estimates\n    \"\"\"\n  [nItems, nRaters, nClasses] = np.shape(counts)\n  log_L = 0.0\n\n  for i in range(nItems):\n    item_likelihood = 0.0\n    for j in range(nClasses):\n\n      class_prior = class_marginals[j]\n      item_class_likelihood = np.prod(\n          np.power(error_rates[:, j, :], counts[i, :, :]))\n      item_class_posterior = class_prior * item_class_likelihood\n      item_likelihood += item_class_posterior\n\n    temp = log_L + np.log(item_likelihood)\n\n    if np.isnan(temp) or np.isinf(temp):\n      logging.info('{0}, {1}, {2}'.format(i, log_L, np.log(item_likelihood),\n                                          temp))\n      sys.exit()\n\n    log_L = temp\n\n  return log_L\n\n\ndef random_initialization(counts):\n  \"\"\"\n    Similar to initialize() above, except choose one initial class for each\n    item, weighted in proportion to the counts.\n\n    Input:\n      counts: counts of the number of times each response was received\n          by each rater from each item: [items x raters x classes]\n\n    Returns:\n      item_classes: matrix of estimates of true item classes:\n          [items x responses]\n    \"\"\"\n  [nItems, nRaters, nClasses] = np.shape(counts)\n\n  response_sums = np.sum(counts, 1)\n\n  # create an empty array\n  item_classes = np.zeros([nItems, nClasses])\n\n  # for each item, choose a random initial class, weighted in proportion\n  # to the counts from all raters\n  for p in range(nItems):\n    weights = response_sums[p, :] / np.sum(response_sums[p, :], dtype=float)\n    item_classes[p, np.random.choice(np.arange(nClasses), p=weights)] = 1\n\n  return item_classes\n\n\ndef majority_voting(counts):\n  \"\"\"\n      An alternative way to initialize assignment of items to classes\n      i.e Get initial estimates for the true item classes using majority voting\n\n    Input:\n      counts: Counts of the number of times each response was received\n          by each rater from each item: [items x raters x classes]\n    Returns:\n      item_classes: matrix of initial estimates of true item classes:\n          [items x responses]\n    \"\"\"\n  [nItems, nRaters, nClasses] = np.shape(counts)\n  # sum over observers\n  response_sums = np.sum(counts, 1)\n\n  # create an empty array\n  item_classes = np.zeros([nItems, nClasses])\n\n  # take the most frequent class for each item\n  for p in range(nItems):\n    indices = np.argwhere(response_sums[p, :] == np.max(response_sums[p, :]))\n    # in the case of ties, take the lowest valued label (could be randomized)\n    item_classes[p, np.min(indices)] = 1\n\n  return item_classes\n\n\ndef parse_item_classes(df, label, item_classes, index_to_unit_id_map,\n                       index_to_y_map, unit_id, worker_id, comment_text_path):\n  \"\"\"\n    Given the original data df, the predicted item_classes, and\n    the data mappings, returns a DataFrame with the fields:\n      * _unit_index: the 0,1,...nItems index\n      * _unit_id: the original item ID\n      * {LABEL}_hat: the predicted probability of the item being labeled 1 as\n               learned from the Dawid-Skene algorithm\n      * {LABEL}_mean: the mean of the original ratings\n    \"\"\"\n  LABEL_HAT = '{}_hat'.format(label)\n  LABEL_MEAN = '{}_mean'.format(label)\n  ROUND_DEC = 8\n  _, N_ClASSES = np.shape(item_classes)\n\n  df_predictions = pd.DataFrame()\n\n  # Add columns for predictions for each class\n  col_names = []\n  for k in range(N_ClASSES):\n    # y is the original value of the class. When we train, we re-map\n    # all the classes to 0,1,....K. But our data has classes like\n    # -2,-1,0,1,2. In that case, of k is 0, then y would be -2\n    y = index_to_y_map[k]\n    col_name = '{0}_{1}'.format(LABEL_HAT, y)\n    col_names.append(col_name)\n\n    df_predictions[col_name] = [round(i[k], ROUND_DEC) for i in item_classes]\n\n  # To get a prediction of the mean label, multiply our predictions with the\n  # true y values.\n  y_values = list(index_to_y_map.values())\n  col_name = '{0}_hat_mean'.format(label)\n  df_predictions[col_name] = np.dot(df_predictions[col_names], list(y_values))\n\n  # Use the _unit_index to map to the original _unit_id\n  df_predictions['_unit_index'] = range(len(item_classes))\n  df_predictions[unit_id] = df_predictions['_unit_index']\\\n                               .apply(lambda i: index_to_unit_id_map[i])\n\n  # Calculate the y_mean from the original data and join on _unit_id\n  # Add a column for the mean predictions\n  df[label] = df[label].astype(float)\n  mean_labels = df.groupby(unit_id, as_index=False)[label]\\\n                 .mean()\\\n                 .round(ROUND_DEC)\\\n                 .rename(index=int, columns={label: LABEL_MEAN})\n  df_predictions = pd.merge(mean_labels, df_predictions, on=unit_id)\n\n  # join with data that contains the item-level comment text\n  if comment_text_path:\n    with tf.gfile.Open(comment_text_path, 'r') as fileobj:\n      logging.info(\n          'Loading comment text data from {}'.format(comment_text_path))\n      df_comments = pd.read_csv(fileobj)\n\n      # drop duplicate comments\n      df_comments = df_comments.drop_duplicates(subset=unit_id)\n\n    df_predictions = df_predictions.merge(df_comments, on=unit_id)\n  return df_predictions\n\n\ndef parse_error_rates(df, error_rates, index_to_worker_id_map, index_to_y_map,\n                      unit_id, worker_id):\n  \"\"\"\n    Given the original data DataFrame, the predicted error_rates and the\n    mappings\n    between the indexes and ids, returns a DataFrame with the fields:\n\n      * _worker_index: the 0,1,...nItems index\n      * _worker_id: the original item ID\n      * _error_rate_{k}_{k}: probability the worker would choose class k when\n          the true class is k (for accurate workers, these numbers are high).\n    \"\"\"\n  columns = [worker_id, '_worker_index']\n\n  df_error_rates = pd.DataFrame()\n\n  # add the integer _worker_index\n  df_error_rates['_worker_index'] = index_to_worker_id_map.keys()\n\n  # add the original _worker_id\n  df_error_rates[worker_id] = [j for (i, j) in index_to_worker_id_map.items()]\n\n  # add annotation counts for each worker\n  worker_counts = df.groupby(\n      by=worker_id, as_index=False)[unit_id]\\\n                    .count()\\\n                    .rename(index=int, columns={unit_id: 'n_annotations'})\n\n  df_error_rates = pd.merge(df_error_rates, worker_counts, on=worker_id)\n\n  # add the diagonal error rates, which are the per-class accuracy rates,\n  # for each class k, we add a column for p(rater will pick k | item's true class is k)\n\n  # y_label is the original y value in the data and y_index is the\n  # integer we mapped it to, i.e. 0, 1, ..., |Y|\n  for y_index, y_label in index_to_y_map.items():\n    col_name = 'accuracy_rate_{0}'.format(y_label)\n    df_error_rates[col_name] = [e[y_index, y_index] for e in error_rates]\n\n  return df_error_rates\n\n\ndef main(FLAGS):\n  logging.basicConfig(level=logging.INFO)\n\n  # load data, each row is an annotation\n  n_examples = FLAGS.n_examples\n  label = FLAGS.label\n  unit_id = FLAGS.unit_id_col\n  worker_id = FLAGS.worker_id_col\n  comment_text_path = FLAGS.comment_text_path\n  df = load_data(FLAGS.data_path, unit_id, worker_id, label)[0:n_examples]\n\n  logging.info('Running on {0} examples for label {1}'.format(len(df), label))\n\n  # convert rater, item and label IDs to integers starting at 0\n  #\n  #   * worker_id_to_index_map: _worker_id -> index\n  #   * index_to_worker_id_map: index -> worker\n  #   * unit_id_to_index_map: _unit_id -> index\n  #   * index_to_unit_id_map: index -> _unit_id\n  #   * y_to_index_map: label -> index\n  #   * index_to_y_map: index -> label\n  worker_id_to_index_map = {\n      w: i for (i, w) in enumerate(df[worker_id].unique())\n  }\n  index_to_worker_id_map = {i: w for (w, i) in worker_id_to_index_map.items()}\n  unit_id_to_index_map = {w: i for (i, w) in enumerate(df[unit_id].unique())}\n  index_to_unit_id_map = {i: w for (w, i) in unit_id_to_index_map.items()}\n  y_to_index_map = {w: i for (i, w) in enumerate(df[label].unique())}\n  index_to_y_map = {i: w for (w, i) in y_to_index_map.items()}\n\n  # create list of unique raters, items and labels\n  raters = list(df[worker_id].apply(lambda x: worker_id_to_index_map[x]))\n  items = list(df[unit_id].apply(lambda x: unit_id_to_index_map[x]))\n  y = list(df[label].apply(lambda x: y_to_index_map[x]))\n\n  nClasses = len(df[label].unique())\n  nItems = len(df[unit_id].unique())\n  nRaters = len(df[worker_id].unique())\n  counts = np.zeros([nItems, nRaters, nClasses])\n\n  # convert responses to counts\n  for i, item_index in enumerate(items):\n    rater_index = raters[i]\n    y_index = y[i]\n    counts[item_index, rater_index, y_index] += 1\n\n  raters_unique = index_to_worker_id_map.keys()\n  items_unique = index_to_unit_id_map.keys()\n  classes_unique = index_to_y_map.keys()\n\n  logging.info('num items: {0}'.format(len(items_unique)))\n  logging.info('num raters: {0}'.format(len(raters_unique)))\n  logging.info('num classes: {0}'.format(len(classes_unique)))\n\n  # run EM\n  start = time.time()\n  class_marginals, error_rates, item_classes = run(\n      items_unique,\n      raters_unique,\n      classes_unique,\n      counts,\n      label,\n      FLAGS.pseudo_count,\n      tol=FLAGS.tolerance,\n      max_iter=FLAGS.max_iter)\n  end = time.time()\n  logging.info('training time: {0:.4f} seconds'.format(end - start))\n\n  # join comment_text, old labels and new labels\n  df_predictions = parse_item_classes(df, label, item_classes,\n                                      index_to_unit_id_map, index_to_y_map,\n                                      unit_id, worker_id, comment_text_path)\n\n  # join rater error_rates\n  df_error_rates = parse_error_rates(df, error_rates, index_to_worker_id_map,\n                                     index_to_y_map, unit_id, worker_id)\n\n  # write predictions and error_rates out as CSV\n  n = len(df)\n  prediction_path = '{0}/predictions_{1}_{2}.csv'.format(\n      FLAGS.job_dir, label, n)\n  error_rates_path = '{0}/error_rates_{1}_{2}.csv'.format(\n      FLAGS.job_dir, label, n)\n\n  logging.info('Writing predictions to {}'.format(prediction_path))\n  with tf.gfile.Open(prediction_path, 'w') as fileobj:\n    df_predictions.to_csv(fileobj, index=False, encoding='utf-8')\n\n  logging.info('Writing error rates to {}'.format(error_rates_path))\n  with tf.gfile.Open(error_rates_path, 'w') as fileobj:\n    df_error_rates.to_csv(fileobj, index=False, encoding='utf-8')\n\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser()\n  parser.add_argument(\n      '--data-path',\n      help='The path to data to run on, local or in Cloud Storage.')\n  parser.add_argument(\n      '--comment-text-path',\n      help='The path to comment text, local or in  Cloud Storage.')\n  parser.add_argument(\n      '--worker-id-col', help='Column name of worker id.', default='_worker_id')\n  parser.add_argument(\n      '--unit-id-col', help='Column name of unit id.', default='_comment_id')\n  parser.add_argument(\n      '--n_examples',\n      help='The number of annotations to use.',\n      default=10000000,\n      type=int)\n  parser.add_argument(\n      '--label',\n      help='The label to train on, e.g. \"obscene\" or \"threat\"',\n      default='obscene')\n  parser.add_argument(\n      '--job-dir',\n      type=str,\n      default='',\n      help='The directory where the job is staged.')\n  parser.add_argument(\n      '--max-iter',\n      help='The max number of iteration to run.',\n      type=int,\n      default=25)\n  parser.add_argument(\n      '--pseudo-count',\n      help='The pseudo count to smooth error rates.',\n      type=float,\n      default=1.0)\n  parser.add_argument(\n      '--tolerance',\n      help='Stop training when variables change less than this value.',\n      type=int,\n      default=1)\n\n  FLAGS = parser.parse_args()\n\n  print('FLAGS', FLAGS)\n\n  main(FLAGS)\n"
  },
  {
    "path": "annotator_models/trainer/dawid_skene_test.py",
    "content": "\"\"\"Tests for dawid_skene.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport collections\nimport os\nimport pandas as pd\nimport tempfile\nimport unittest\n\nimport dawid_skene\n\nclass DawidSkeneTest(unittest.TestCase):\n\n  # The contents of Maximum Likelihood Estimation of Observer Error-Rates\n  # Using the EM Algorithm Table 1.\n  def setUp(self):\n    self.table_1 = pd.DataFrame.from_dict({\n        'patient':\n            range(1, 46),\n        11: [\n            1, 3, 1, 2, 2, 2, 1, 3, 2, 2, 4, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2,\n            2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 2, 4, 2, 2, 3, 1, 1, 1, 2, 1, 2\n        ],\n        12: [\n            1, 3, 1, 2, 2, 2, 2, 3, 2, 3, 4, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2,\n            2, 1, 1, 3, 1, 1, 1, 1, 3, 1, 2, 2, 3, 2, 3, 3, 1, 1, 2, 3, 2, 2\n        ],\n        13: [\n            1, 3, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2,\n            1, 1, 1, 2, 1, 1, 2, 1, 3, 1, 2, 2, 3, 1, 2, 3, 1, 1, 1, 2, 1, 2\n        ],\n        2: [\n            1, 4, 2, 3, 3, 3, 2, 3, 2, 2, 4, 3, 1, 3, 1, 2, 1, 1, 2, 1, 2, 2, 3,\n            2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 3, 4, 2, 3, 3, 1, 1, 2, 2, 1, 2\n        ],\n        3: [\n            1, 3, 1, 1, 2, 3, 1, 4, 2, 2, 4, 3, 1, 2, 1, 1, 1, 1, 2, 3, 2, 2, 2,\n            2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 3, 2, 2, 4, 1, 1, 1, 2, 1, 2\n        ],\n        4: [\n            1, 3, 2, 2, 2, 2, 1, 3, 2, 2, 4, 4, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,\n            2, 1, 1, 2, 1, 1, 2, 1, 3, 1, 2, 3, 4, 3, 3, 3, 1, 1, 1, 2, 1, 2\n        ],\n        5: [\n            1, 4, 2, 1, 2, 2, 1, 3, 3, 3, 4, 3, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2,\n            2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 2, 2, 3, 2, 3, 2, 1, 1, 1, 2, 1, 2\n        ]\n    })\n\n  def test_paper_example(self):\n    with tempfile.TemporaryDirectory() as tempdirname:\n      f = tempfile.NamedTemporaryFile(delete=False)\n      f.file.close()\n      data = self.table_1.set_index('patient').stack().rename_axis(['patient', 'observer']).to_frame('label').reset_index()\n      data['observer'] = data['observer'].map({11:1, 12:1, 13:1, 2:2, 3:3, 4:4, 5:5})\n      data.to_csv(f.name, header=True)\n\n      Flags = collections.namedtuple('Flags', 'n_examples label unit_id_col worker_id_col comment_text_path data_path pseudo_count tolerance max_iter job_dir')\n      Flags.data_path = f.name\n      Flags.label = 'label'\n      Flags.worker_id_col = 'observer'\n      Flags.unit_id_col = 'patient'\n      Flags.n_examples = 350\n      Flags.pseudo_count = 1.0\n      Flags.comment_text_path = None\n      Flags.max_iter = 25\n      Flags.tolerance = 1\n      Flags.job_dir = tempdirname\n      dawid_skene.main(Flags)\n      os.unlink(f.name)\n      predictions = pd.read_csv(os.path.join(tempdirname, 'predictions_label_315.csv'))\n      print(predictions)\n      error_rates = pd.read_csv(os.path.join(tempdirname, 'error_rates_label_315.csv'))\n      print(error_rates)\n\n\nif __name__ == '__main__':\n  unittest.main()\n"
  },
  {
    "path": "attention-tutorial/Attention_Model_Tutorial.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"szO16q_1vXOT\"\n   },\n   \"source\": [\n    \"# Attention Based Classification Tutorial\\n\",\n    \"\\n\",\n    \"**Recommended time: 30 minutes**\\n\",\n    \"\\n\",\n    \"**Contributors: nthain, martin-gorner**\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"This tutorial provides an introduction to building text classification models in tensorflow that use attention to provide insight into how classification decisions are being made. We will build our tensorflow graph following the Embed - Encode - Attend - Predict paradigm introduced by Matthew Honnibal. For more information about this approach, you can refer to:\\n\",\n    \"\\n\",\n    \"Slides: https://goo.gl/BYT7au\\n\",\n    \"\\n\",\n    \"Video: https://youtu.be/pzOzmxCR37I\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"Figure 1 below provides a representation of the full tensorflow graph we will build in this tutorial. The green squares represent RNN cells and the blue trapezoids represent neural networks for computing attention weights which will be discussed in more detail below. We will implement each piece of this model graph in a seperate function. The whole model will then simply be calling all of these functions in turn. \\n\",\n    \"\\n\",\n    \"\\n\",\n    \"![Figure 1](img/entire_model.png \\\"Figure 1\\\")\\n\",\n    \"\\n\",\n    \"This tutorial was created in collaboration with the Tensorflow without a PhD series. To check out more episodes, tutorials, and codelabs from this series, please visit: \\n\",\n    \"\\n\",\n    \"https://github.com/GoogleCloudPlatform/tensorflow-without-a-phd\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"tROhMjW49Dsr\"\n   },\n   \"source\": [\n    \"### Imports\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"vSgQlcQqbWyb\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"%load_ext autoreload\\n\",\n    \"%autoreload 2\\n\",\n    \"\\n\",\n    \"from __future__ import absolute_import\\n\",\n    \"from __future__ import division\\n\",\n    \"from __future__ import print_function\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"import pandas as pd\\n\",\n    \"import tensorflow as tf\\n\",\n    \"import numpy as np\\n\",\n    \"import time\\n\",\n    \"import os\\n\",\n    \"from sklearn import metrics\\n\",\n    \"from visualize_attention import attentionDisplay\\n\",\n    \"from process_figshare import download_figshare, process_figshare\\n\",\n    \"\\n\",\n    \"tf.set_random_seed(1234)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"KKwX66FG9G-L\"\n   },\n   \"source\": [\n    \"## Load & Explore Data\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"4YFtwZsD4J7r\"\n   },\n   \"source\": [\n    \"Let's begin by downloading the data from [Figshare](https://figshare.com/articles/Wikipedia_Talk_Labels_Toxicity/4563973) and cleaning and splitting it for use in training.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"download_figshare()\\n\",\n    \"process_figshare()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"We then load these splits as pandas dataframes.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"aIy4ggIxbWyg\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"SPLITS = ['train', 'dev', 'test']\\n\",\n    \"\\n\",\n    \"wiki = {}\\n\",\n    \"for split in SPLITS:\\n\",\n    \"    wiki[split] = pd.read_csv('data/wiki_%s.csv' % split)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"_eZEM1wd5FiA\"\n   },\n   \"source\": [\n    \"We display the top few rows of the dataframe to see what we're dealing with. The key columns are 'comment' which contains the text of a comment from a Wikipedia talk page and 'toxicity' which contains the fraction of annotators who found this comment to be toxic. More information about the other fields and how this data was collected can be found on [this wiki](https://meta.wikimedia.org/wiki/Research:Detox/Data_Release) and [research paper](https://arxiv.org/abs/1610.08914).\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 195,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 334,\n     \"status\": \"ok\",\n     \"timestamp\": 1519755503377,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"6sj_aimNbWyn\",\n    \"outputId\": \"36fccb7e-60a3-4d1c-bbfa-03483ff49f84\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"wiki['train'].head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"p0cz2kA_9JxK\"\n   },\n   \"source\": [\n    \"### Hyperparameters\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Hyperparameters are used to specify various aspects of our model's architecture. In practice, these are often critical to model performance and are carefully tuned using some type of [hyperparameter search](https://en.wikipedia.org/wiki/Hyperparameter_optimization). For this tutorial, we will choose a reasonable set of hyperparameters and treat them as fixed.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"JSvJ3wwwbWys\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"hparams = {'max_document_length': 60,\\n\",\n    \"           'embedding_size': 50,\\n\",\n    \"           'rnn_cell_size': 128,\\n\",\n    \"           'batch_size': 256,\\n\",\n    \"           'attention_size': 32,\\n\",\n    \"           'attention_depth': 2}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"owTqZg2ebWyv\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"MAX_LABEL = 2\\n\",\n    \"WORDS_FEATURE = 'words'\\n\",\n    \"NUM_STEPS = 300\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Step 0: Text Preprocessing\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Before we can build a neural network on comment strings, we first have to complete a number of preprocessing steps. In particular, it is important that we \\\"tokenize\\\" the string, splitting it into an array of tokens. In our case, each token will be a word in our sentence and they will be seperated by spaces and punctuation. Many alternative tokenizers exist, some of which use characters as tokens, and others which include punctuation, emojis, or even cleverly handle misspellings. \\n\",\n    \"\\n\",\n    \"Once we've tokenized the sentences, each word will be replaced with an integer representative. This will make the embedding (Step 1) much easier. \\n\",\n    \"\\n\",\n    \"Happily the tensorflow function [VocabularyProcessor](http://tflearn.org/data_utils/#vocabulary-processor) takes care of both the tokenization and integer mapping. We only have to give it the max_document_length argument which will determine the length of the output arrays. If sentences are shorter than this length, they will be padded and if they are longer, they will be trimmed. The VocabularyProcessor is then trained on the training set to build the initial vocabulary and map the words to integers.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"9kcrgebgbWzB\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Initialize the vocabulary processor\\n\",\n    \"vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(hparams['max_document_length'])\\n\",\n    \"\\n\",\n    \"def process_inputs(vocab_processor, df, train_label = 'train', test_label = 'test'):\\n\",\n    \"    \\n\",\n    \"    # For simplicity, we call our features x and our outputs y\\n\",\n    \"    x_train = df['train'].comment\\n\",\n    \"    y_train = df['train'].is_toxic\\n\",\n    \"    x_test = df['test'].comment\\n\",\n    \"    y_test = df['test'].is_toxic\\n\",\n    \"\\n\",\n    \"    # Train the vocab_processor from the training set\\n\",\n    \"    x_train = vocab_processor.fit_transform(x_train)\\n\",\n    \"    # Transform our test set with the vocabulary processor\\n\",\n    \"    x_test = vocab_processor.transform(x_test)\\n\",\n    \"\\n\",\n    \"    # We need these to be np.arrays instead of generators\\n\",\n    \"    x_train = np.array(list(x_train))\\n\",\n    \"    x_test = np.array(list(x_test))\\n\",\n    \"    y_train = np.array(y_train).astype(int)\\n\",\n    \"    y_test = np.array(y_test).astype(int)\\n\",\n    \"\\n\",\n    \"    n_words = len(vocab_processor.vocabulary_)\\n\",\n    \"    print('Total words: %d' % n_words)\\n\",\n    \"\\n\",\n    \"    # Return the transformed data and the number of words\\n\",\n    \"    return x_train, y_train, x_test, y_test, n_words\\n\",\n    \"\\n\",\n    \"x_train, y_train, x_test, y_test, n_words = process_inputs(vocab_processor, wiki)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"1KtFmLmp9M0t\"\n   },\n   \"source\": [\n    \"### Step 1: Embed\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"AjtQe9eT9v4v\"\n   },\n   \"source\": [\n    \"Neural networks at their core are a composition of operators from linear algebra and non-linear activation functions. In order to perform these computations on our input sentences, we must first embed them as a vector of numbers. There are two main approaches to perform this embedding:\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"1.   **Pre-trained:** It is often beneficial to initialize our embedding matrix using pre-trained embeddings like [Word2Vec](??) or [GloVe](??). These embeddings are trained on a huge corpus of text with a general purpose problem so that they incorporate syntactic and semantic properties of the words being embedded and are amenable to transfer learning on new problems. Once initialized, you can optionally train them further for your specific problem by allowing the embedding matrix in the graph to be a trainable variable in our tensorflow graph. \\n\",\n    \"2.   **Random:** Alternatively, embeddings can be \\\"trained from scratch\\\" by initializing the embedding matrix randomly and then training it like any other parameter in the tensorflow graph.\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"rCubiw6eUVQm\"\n   },\n   \"source\": [\n    \"In this notebook, we will be using a random initialization. To perform this embedding we use the embed_sequence function from the layers package. This will take our input features, which are the arrays of integers we produced in Step 0, and will randomly initialize a matrix to embed them into. The parameters of this matrix will then be trained with the rest of the graph.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"UG1UXX4L_KQk\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"def embed(features):\\n\",\n    \"    word_vectors = tf.contrib.layers.embed_sequence(\\n\",\n    \"        features[WORDS_FEATURE], \\n\",\n    \"        vocab_size=n_words, \\n\",\n    \"        embed_dim=hparams['embedding_size'])\\n\",\n    \"    \\n\",\n    \"    return word_vectors\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"nBp5uc-tSee2\"\n   },\n   \"source\": [\n    \"### Step 2: Encode\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"9vjxtIroTBUq\"\n   },\n   \"source\": [\n    \"A [recurrent neural network](https://en.wikipedia.org/wiki/Recurrent_neural_network) is a deep learning architecture that is useful for encoding sequential information like sentences. They are built around a single cell which contains one of several standard neural network architectures (e.g. simple [RNN](https://en.wikipedia.org/wiki/Recurrent_neural_network), [GRU](https://en.wikipedia.org/wiki/Gated_recurrent_unit), or [LSTM](https://en.wikipedia.org/wiki/Long_short-term_memory)). We will not focus on the details of the architectures, but at each point in time the cell takes in two inputs and produces two outputs. The inputs are the input token for that step in the sequence and some state from the previous steps in the sequence. The outputs produced are the encoded vectors for the current sequence step and a state to pass on to the next step of the sequence. \\n\",\n    \"\\n\",\n    \"Figure 2 shows what this looks like for an unrolled RNN. Each cell (represented by a green square) has two input arrows and two output arrrows. Note that all of the green squares represent the same cell and share parameters. One major advantage of this cell replication is that, at inference time, it allows us to deal with arbitrary length input and not be restricted by the input sizes of our training set.\\n\",\n    \"\\n\",\n    \"![Figure 2](img/figure_2_v0.png \\\"Figure 2\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"For our model, we will use a bi-directional RNN. This is simply the concatentation of two RNNs, one which processes the sequence from left to right (the \\\"forward\\\" RNN) and one which process from right to left (the \\\"backward\\\" RNN). By using both directions, we get a stronger encoding as each word can be encoded using the context of its neighbors on boths sides rather than just a single side.  For our cells, we use [gated recurrent units (GRUs)](https://en.wikipedia.org/wiki/Gated_recurrent_unit). Figure 3 gives a visual representation of this.\\n\",\n    \"\\n\",\n    \"![Figure 3](img/figure_3.png \\\"Figure 3\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"DBDS9LjdUZbV\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"def encode(word_vectors):\\n\",\n    \"    # Create a Gated Recurrent Unit cell with hidden size of RNN_SIZE.\\n\",\n    \"    # Since the forward and backward RNNs will have different parameters, we instantiate two seperate GRUS.\\n\",\n    \"    rnn_fw_cell = tf.contrib.rnn.GRUCell(hparams['rnn_cell_size'])\\n\",\n    \"    rnn_bw_cell = tf.contrib.rnn.GRUCell(hparams['rnn_cell_size'])\\n\",\n    \"    \\n\",\n    \"    # Create an unrolled Bi-Directional Recurrent Neural Networks to length of\\n\",\n    \"    # max_document_length and passes word_list as inputs for each unit.\\n\",\n    \"    outputs, _ = tf.nn.bidirectional_dynamic_rnn(rnn_fw_cell, \\n\",\n    \"                                                 rnn_bw_cell, \\n\",\n    \"                                                 word_vectors, \\n\",\n    \"                                                 dtype=tf.float32, \\n\",\n    \"                                                 time_major=False)\\n\",\n    \"    \\n\",\n    \"    return outputs\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"V8hbwTb7dXLV\"\n   },\n   \"source\": [\n    \"### Step 3: Attend\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"PMKkWgSwdZSq\"\n   },\n   \"source\": [\n    \"There are a number of ways to use the encoded states of a recurrent neural network for prediction. One traditional approach is to simply use the final encoded state of the network, as seen in Figure 2. However, this could lose some useful information encoded in the previous steps of the sequence. In order to keep that information, one could instead use an average of the encoded states outputted by the RNN. There is not reason to believe, though, that all of the encoded states of the RNN are equally valuable. Thus, we arrive at the idea of using a weighted sum of these encoded states to make our prediction.\\n\",\n    \"\\n\",\n    \"We will call the weights of this weighted sum \\\"attention weights\\\" as we will see below that they correspond to how important our model thinks each token of the sequence is in making a prediction decision. We compute these attention weights simply by building a small fully connected neural network on top of each encoded state. This network will have a single unit final layer which will correspond to the attention weight we will assign. As for RNNs, the parameters of this network will be the same for each step of the sequence, allowing us to accomodate variable length inputs. Figure 4 shows us what the graph would look like if we applied attention to a uni-directional RNN.\\n\",\n    \"\\n\",\n    \"![Figure 4](img/figure_4.png \\\"Figure 4\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Again, as our model uses a bi-directional RNN, we first concatenate the hidden states from each RNN before computing the attention weights and applying the weighted sum. Figure 5 below visualizes this step. \\n\",\n    \"\\n\",\n    \"![Figure 5](img/figure_5.png \\\"Figure 5\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"3a9fkmUOdeHh\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"def attend(inputs, attention_size, attention_depth):\\n\",\n    \"  \\n\",\n    \"  inputs = tf.concat(inputs, axis = 2)\\n\",\n    \"  \\n\",\n    \"  inputs_shape = inputs.shape\\n\",\n    \"  sequence_length = inputs_shape[1].value\\n\",\n    \"  final_layer_size = inputs_shape[2].value\\n\",\n    \"  \\n\",\n    \"  x = tf.reshape(inputs, [-1, final_layer_size])\\n\",\n    \"  for _ in range(attention_depth-1):\\n\",\n    \"    x = tf.layers.dense(x, attention_size, activation = tf.nn.relu)\\n\",\n    \"  x = tf.layers.dense(x, 1, activation = None)\\n\",\n    \"  logits = tf.reshape(x, [-1, sequence_length, 1])\\n\",\n    \"  alphas = tf.nn.softmax(logits, dim = 1)\\n\",\n    \"  \\n\",\n    \"  output = tf.reduce_sum(inputs * alphas, 1)\\n\",\n    \"\\n\",\n    \"  return output, alphas\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"bqtYZzWeoz55\"\n   },\n   \"source\": [\n    \"### Step 4: Predict\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"To genereate a class prediction about whether a comment is toxic or not, the final part of our tensorflow graph takes the weighted average of hidden states generated in the attention step and uses a fully connected layer with a softmax activation function to generate probability scores for each of our prediction classes. While training, the model will use the cross-entropy loss function to train its parameters. \\n\",\n    \"\\n\",\n    \"As we will use the [estimator framework](https://www.tensorflow.org/get_started/custom_estimators) to train our model, we write an estimator_spec function to specify how our model is trained and what values to return during the prediction stage. We also specify the evaluation metrics of accuracy and auc, which we will use to evaluate our model in Step 7.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"L6_Wo4ixbWzI\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"def estimator_spec_for_softmax_classification(\\n\",\n    \"    logits, labels, mode, alphas):\\n\",\n    \"  \\\"\\\"\\\"Returns EstimatorSpec instance for softmax classification.\\\"\\\"\\\"\\n\",\n    \"  predicted_classes = tf.argmax(logits, 1)\\n\",\n    \"  if mode == tf.estimator.ModeKeys.PREDICT:\\n\",\n    \"    return tf.estimator.EstimatorSpec(\\n\",\n    \"        mode=mode,\\n\",\n    \"        predictions={\\n\",\n    \"            'class': predicted_classes,\\n\",\n    \"            'prob': tf.nn.softmax(logits),\\n\",\n    \"            'attention': alphas\\n\",\n    \"        })\\n\",\n    \"\\n\",\n    \"  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)\\n\",\n    \"  loss = tf.losses.softmax_cross_entropy(\\n\",\n    \"      onehot_labels=onehot_labels, logits=logits)\\n\",\n    \"  if mode == tf.estimator.ModeKeys.TRAIN:\\n\",\n    \"    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\\n\",\n    \"    train_op = optimizer.minimize(loss, \\n\",\n    \"                                  global_step=tf.train.get_global_step())\\n\",\n    \"    return tf.estimator.EstimatorSpec(mode, \\n\",\n    \"                                      loss=loss, \\n\",\n    \"                                      train_op=train_op)\\n\",\n    \"\\n\",\n    \"  eval_metric_ops = {\\n\",\n    \"      'accuracy': tf.metrics.accuracy(\\n\",\n    \"          labels=labels, predictions=predicted_classes),\\n\",\n    \"      'auc': tf.metrics.auc(\\n\",\n    \"          labels=labels, predictions=predicted_classes),    \\n\",\n    \"  }\\n\",\n    \"  return tf.estimator.EstimatorSpec(\\n\",\n    \"      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"The predict component of our graph then just takes the output of our attention step, i.e. the weighted average of the bi-RNN hidden layers, and adds one more fully connected layer to compute the logits. These logits are fed into a our estimator_spec which uses a softmax to get the final class probabilties and a [softmax_cross_entropy](https://www.tensorflow.org/api_docs/python/tf/losses/softmax_cross_entropy) to build a loss function.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def predict(encoding, labels, mode, alphas):\\n\",\n    \"    logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)\\n\",\n    \"    return estimator_spec_for_softmax_classification(\\n\",\n    \"          logits=logits, labels=labels, mode=mode, alphas=alphas)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"0URRXudn9Qlg\"\n   },\n   \"source\": [\n    \"### Step 5: Complete Model Architecture\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"collapsed\": true,\n    \"id\": \"cdb9C4jNbCBj\"\n   },\n   \"source\": [\n    \"We are now ready to put it all together. As you can see from the bi_rnn_model function below, once you have the components for embed, encode, attend, and predict, putting the whole graph together is extremely simple!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"FcxSFa5vbWzR\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"def bi_rnn_model(features, labels, mode):\\n\",\n    \"  \\\"\\\"\\\"RNN model to predict from sequence of words to a class.\\\"\\\"\\\"\\n\",\n    \"\\n\",\n    \"  word_vectors = embed(features)\\n\",\n    \"  outputs = encode(word_vectors)\\n\",\n    \"  encoding, alphas = attend(outputs, \\n\",\n    \"                            hparams['attention_size'], \\n\",\n    \"                            hparams['attention_depth'])\\n\",\n    \"\\n\",\n    \"  return predict(encoding, labels, mode, alphas)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![Figure 1](img/entire_model.png \\\"Figure 1\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"9jZqVeWx9TVT\"\n   },\n   \"source\": [\n    \"### Step 6: Train Model\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"We will use the estimator framework to train our model. To define our classifier, we just provide it with the complete model graph (i.e. the bi_rnn_model function) and a directory where the models will be saved.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"HFDYpImJbWzT\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"current_time = str(int(time.time()))\\n\",\n    \"model_dir = os.path.join('checkpoints', current_time)\\n\",\n    \"classifier = tf.estimator.Estimator(model_fn=bi_rnn_model, \\n\",\n    \"                                    model_dir=model_dir)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"The estimator framework also requires us to define an input function. This will take the input data and provide it during model training in batches. We will use the provided numpy_input_function, which takes numpy arrays as features and labels. We also specify the batch size and whether we want to shuffle the data between epochs.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 34,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 153379,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758352944,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"gXJdQHe-bWzX\",\n    \"outputId\": \"353cbe80-0e36-4832-ed8e-5e6d31087ca1\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Train.\\n\",\n    \"train_input_fn = tf.estimator.inputs.numpy_input_fn(\\n\",\n    \"  x={WORDS_FEATURE: x_train},\\n\",\n    \"  y=y_train,\\n\",\n    \"  batch_size=hparams['batch_size'],\\n\",\n    \"  num_epochs=None,\\n\",\n    \"  shuffle=True)\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Now, it's finally time to train our model! With estimator, this is as easy as calling the train function and specifying how long we'd like to train for.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"classifier.train(input_fn=train_input_fn, \\n\",\n    \"                 steps=NUM_STEPS)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"wJQI2zW19V8j\"\n   },\n   \"source\": [\n    \"### Step 7: Predict and Evaluate Model\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"To evaluate the function, we will use it to predict the values of examples from our test set. Again, we define a numpy_input_fn, for the test data in this case, and then have the classifier run predictions on this input function.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"4E5poMgPbWza\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Predict.\\n\",\n    \"test_input_fn = tf.estimator.inputs.numpy_input_fn(\\n\",\n    \"  x={WORDS_FEATURE: x_test},\\n\",\n    \"  y=y_test,\\n\",\n    \"  num_epochs=1,\\n\",\n    \"  shuffle=False)\\n\",\n    \"\\n\",\n    \"predictions = classifier.predict(input_fn=test_input_fn)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"These predictions are returned to us as a generator. The code below gives an example of how we can extract the class and attention weights for each prediction.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"oTL7trjX00Zp\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"y_predicted = []\\n\",\n    \"alphas_predicted = []\\n\",\n    \"for p in predictions:\\n\",\n    \"    y_predicted.append(p['class'])\\n\",\n    \"    alphas_predicted.append(p['attention'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"To evaluate our model, we can use the evaluate function provided by estimator to get the [accuracy](https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers) and [ROC-AUC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) scores as we defined them in our estimator_spec.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 34,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 17936,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758410784,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"jpgentt6bWzf\",\n    \"outputId\": \"ae6de3cc-9eb5-469a-e04e-958a784e9dee\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"scores = classifier.evaluate(input_fn=test_input_fn)\\n\",\n    \"print('Accuracy: {0:f}'.format(scores['accuracy']))\\n\",\n    \"print('AUC: {0:f}'.format(scores['auc']))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"lOmmwP6UV8h7\"\n   },\n   \"source\": [\n    \"### Step 8: Display Attention\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Now that we have a trained attention based toxicity model, let's use it to visualize how our model makes its classification decisions. We use the helpful attentionDisplay class from the visualize_attention package. Given any sentence, this class uses our trained classifier to determine whether the sentence is toxic and also returns a representation of the attention weights. In the arrays below, the more red a word is, the more weight classifier puts on encoded word. Try it out on some sentences of your own and see what patterns you can find!\\n\",\n    \"\\n\",\n    \"Note: If you are viewing this on Github, the colors in the cells won't display properly. We recommend viewing it locally or with [nbviewer](https://nbviewer.jupyter.org/) to see the correct rendering of the attention weights.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"display = attentionDisplay(vocab_processor, classifier)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1096,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758417492,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"xSpv2plUV4mN\",\n    \"outputId\": \"952a6fc6-bac4-46ab-c354-c54e5d288d75\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"Fuck off, you idiot.\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1024,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758419192,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"m9bsno-UV4o0\",\n    \"outputId\": \"beb38261-3e4e-4348-e62f-d23bac629268\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"Thanks for your help editing this.\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1223,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758421016,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"nB4G8rriV4wt\",\n    \"outputId\": \"2b540ca1-a03d-475a-a54a-6c22558e0be3\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"You're such an asshole. But thanks anyway.\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1067,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758422814,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"2L3TNl-NV4zV\",\n    \"outputId\": \"d58ba84a-c30f-4ddb-ecb5-3fc36a850bd5\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"I'm going to shoot you!\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1383,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758424819,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"r5BKahjfV41o\",\n    \"outputId\": \"05b91277-4d0a-4627-8cb9-c2275a799927\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"Oh shoot. Well alright.\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1154,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758426592,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"8GicGWbCV4uz\",\n    \"outputId\": \"f02500eb-35a9-466a-a759-8b83fb05feb3\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"First of all who the fuck died and made you the god.\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1061,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758428491,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"kWIR-ivlWi18\",\n    \"outputId\": \"fb25ede3-e321-4abb-e358-3a0be35266fa\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"Gosh darn it!\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1400,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758433415,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"MJhqEbl8WlJm\",\n    \"outputId\": \"acf96708-f04a-4493-a650-70ff8f6aa2a7\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"God damn it!\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"height\": 95,\n     \"output_extras\": [\n      {\n       \"item_id\": 1\n      },\n      {\n       \"item_id\": 2\n      }\n     ]\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 1400,\n     \"status\": \"ok\",\n     \"timestamp\": 1519758437722,\n     \"user\": {\n      \"displayName\": \"Nithum Thain\",\n      \"photoUrl\": \"//lh4.googleusercontent.com/-o8q7BcjxLpg/AAAAAAAAAAI/AAAAAAAAABQ/-zA_Kee6FY0/s50-c-k-no/photo.jpg\",\n      \"userId\": \"105288052437331023238\"\n     },\n     \"user_tz\": 210\n    },\n    \"id\": \"BDWSuL3kZCT1\",\n    \"outputId\": \"795856d9-ab5d-48aa-ceb2-46a654eec60b\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"display.display_prediction_attention(\\\"You're not that smart are you?\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"colab\": {\n   \"collapsed_sections\": [],\n   \"default_view\": {},\n   \"last_runtime\": {\n    \"build_target\": \"//learning/brain/python/client:colab_notebook\",\n    \"kind\": \"private\"\n   },\n   \"name\": \"Attention Model Codelab.ipynb\",\n   \"provenance\": [\n    {\n     \"file_id\": \"1TEez0zxlE23RyPtPVEUaL6zhim-r8gMj\",\n     \"timestamp\": 1518199421351\n    },\n    {\n     \"file_id\": \"0By5BN4UDRuWSSHJuR2t2YVIzZjQ\",\n     \"timestamp\": 1509645017645\n    }\n   ],\n   \"version\": \"0.3.2\",\n   \"views\": {}\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.5\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 1\n}\n"
  },
  {
    "path": "attention-tutorial/README.md",
    "content": "# Attention Based Classification Tutorial\n\n**Recommended time: 30 minutes**\n\n**Contributors: nthain, martin-gorner**\n\n\nThis tutorial provides an introduction to building text classification models in Tensorflow that use attention to provide insight into how classification decisions are being made. We will build our Tensorflow graph following the Embed - Encode - Attend - Predict paradigm introduced by Matthew Honnibal. For more information about this approach, you can refer to:\n\nSlides: https://goo.gl/BYT7au\n\nVideo: https://youtu.be/pzOzmxCR37I\n\nFigure 1 below provides a representation of the full Tensorflow graph we will build in this tutorial.\n\n![Figure 1](img/entire_model.png \"Figure 1\")\n\nThis tutorial was created in collaboration with the Tensorflow without a PhD series. To check out more episodes, tutorials, and codelabs from this series, please visit: \n\nhttps://github.com/GoogleCloudPlatform/tensorflow-without-a-phd\n\n\n## To Run Locally\n\n1.  Setup a (virtualenv)[https://virtualenvwrapper.readthedocs.io/en/latest/] for\n    the project (recommended, but technically optional).\n    ```\n\n    Python 3:\n\n    ```\n    python3 -m venv env\n    ```\n\n    To enter your virtual env:\n\n    ```shell\n    source env/bin/activate\n    ```\n\n2.  Install library dependencies:\n\n    ```shell\n    pip install -r requirements.txt\n    ```\n    \n"
  },
  {
    "path": "attention-tutorial/checkpoints/README.md",
    "content": "This directory stores model checkpoints during training.\n"
  },
  {
    "path": "attention-tutorial/data/README.md",
    "content": "A directory to hold our toxicity data."
  },
  {
    "path": "attention-tutorial/process_figshare.py",
    "content": "\"\"\"Cleans and splits the toxicity data from Figshare:\n\nhttps://figshare.com/articles/Wikipedia_Talk_Labels_Toxicity/4563973\n\n------------------------------------------------------------------------\n\nCopyright 2018, Google Inc.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport pandas as pd\nimport os\nimport re\nfrom urllib.request import urlretrieve\n\nDEFAULT_DATA_DIR = 'data/'\nFIGSHARE_PATH = 'https://ndownloader.figshare.com/files/'\nFIGSHARE_URL_MAPPING = {\n    'toxicity_annotations.tsv': FIGSHARE_PATH + '7394539',\n    'toxicity_annotated_comments.tsv': FIGSHARE_PATH + '7394542'\n}\n\n\ndef download_figshare(download_data_dir=DEFAULT_DATA_DIR):\n  \"\"\"\n    Downloads the toxicity data from Figshare.\n\n    Args:\n          * download_data_dir (string): if provided, the directory where the\n            Figshare tsvs should be stored\n  \"\"\"\n  if not os.path.exists(download_data_dir):\n    os.makedirs(download_data_dir)\n\n  already_exist = True\n  for file in ['toxicity_annotations.tsv', 'toxicity_annotated_comments.tsv']:\n    if not os.path.isfile(os.path.join(download_data_dir, file)):\n      already_exist = False\n      print('Downloading %s...' % file, end='')\n      urlretrieve(FIGSHARE_URL_MAPPING[file],\n                  os.path.join(download_data_dir, file))\n      print('Done!')\n\n  if already_exist:\n    print('Figshare data already exists.')\n    return\n\n\ndef process_figshare(input_data_dir=DEFAULT_DATA_DIR,\n                     output_data_dir=DEFAULT_DATA_DIR):\n  \"\"\"\n    Cleans and splits the toxicity data from Figshare.\n\n    Args:\n          * input_data_dir (string): if provided, the directory where the\n            Figshare tsvs are stored\n          * output_data_dir (string): if provided, the directory where the\n            output splits should be written\n  \"\"\"\n  already_exist = True\n  for split in ['train', 'test', 'dev']:\n    if not os.path.isfile(os.path.join(output_data_dir, 'wiki_%s.csv' % split)):\n      already_exist = False\n\n  if already_exist:\n    print('Processed files already exist.')\n    return\n\n  print('Processing files...', end='')\n  toxicity_annotated_comments = pd.read_csv(\n      os.path.join(input_data_dir, 'toxicity_annotated_comments.tsv'),\n      sep='\\t',\n      dtype={'rev_id': 'str'})\n  toxicity_annotations = pd.read_csv(\n      os.path.join(input_data_dir, 'toxicity_annotations.tsv'),\n      sep='\\t',\n      dtype={'rev_id': 'str'})\n\n  annotations_gped = toxicity_annotations.groupby(\n      'rev_id', as_index=False).agg({'toxicity': 'mean'})\n  all_data = pd.merge(\n      annotations_gped, toxicity_annotated_comments, on='rev_id')\n\n  all_data['comment'] = all_data['comment'].apply(lambda x: re.sub(\n      'NEWLINE_TOKEN|TAB_TOKEN', ' ', x))\n\n  all_data['is_toxic'] = all_data['toxicity'] > 0.5\n\n  # split into train, valid, test\n  wiki_splits = {}\n  for split in ['train', 'test', 'dev']:\n    wiki_splits[split] = all_data.query('split == @split')\n\n  for split in wiki_splits:\n    wiki_splits[split].to_csv(\n        os.path.join(output_data_dir, 'wiki_%s.csv' % split), index=False)\n  print('Done!')\n\n\n# TODO(nthain): Add input and output dirs as flags.\nif __name__ == '__main__':\n  process_figshare()\n"
  },
  {
    "path": "attention-tutorial/requirements.txt",
    "content": "absl-py==0.1.9\nappnope==0.1.0\nbleach==3.3.0\ncertifi==2024.7.4\nchardet==3.0.4\ncomet-ml==1.0.8\ndecorator==4.2.1\nentrypoints==0.2.3\nenum34==1.1.6\nfutures==3.1.1\nh5py==2.7.1\nhtml5lib==0.999999999\nidna==3.7\nipykernel==4.8.2\nipython==8.10.0\nipython-genutils==0.2.0\nipywidgets==7.1.2\njedi==0.11.1\nJinja2==3.1.4\njsonschema==2.6.0\njupyter==1.0.0\njupyter-client==5.2.3\njupyter-console==5.2.0\njupyter-core==4.11.2\nkaggle==1.0.5\nKeras==2.13.1\nMarkdown==2.6.11\nMarkupSafe==1.0\nmistune==2.0.3\nnbconvert==6.5.1\nnbformat==4.4.0\nnltk==3.9\nnotebook==6.4.12\nnumpy==1.22.0\npandas==0.22.0\npandocfilters==1.4.2\nparso==0.1.1\npexpect==4.4.0\npickleshare==0.7.4\nPillow==10.3.0\nprompt-toolkit==1.0.15\nprotobuf==3.18.3\nptyprocess==0.5.2\nPygments==2.15.0\npython-dateutil==2.6.1\npytz==2017.3\nPyYAML==5.4\npyzmq==17.0.0\nqtconsole==4.3.1\nrequests==2.32.2\nscikit-learn==0.19.1\nscipy==1.10.0\nSend2Trash==1.5.0\nsimplegeneric==0.8.1\nsix==1.11.0\nsklearn==0.0\ntensorflow==2.12.1\ntensorflow-tensorboard==1.5.0\nterminado==0.8.1\ntestpath==0.3.1\ntflearn==0.3.2\ntornado==6.4.1\ntraitlets==4.3.2\nurllib3==1.26.18\nwcwidth==0.1.7\nwebencodings==0.5.1\nwebsocket-client==0.47.0\nWerkzeug==3.0.6\nwidgetsnbextension==3.1.4\nwurlitzer==1.0.1\n"
  },
  {
    "path": "attention-tutorial/visualize_attention.py",
    "content": "\"\"\"A class to help visualize attention weights.\n\n------------------------------------------------------------------------\n\nCopyright 2018, Google Inc.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport pandas as pd\nimport tensorflow as tf\nimport numpy as np\n\npd.set_option('max_columns', 100)\ntokenizer = tf.contrib.learn.preprocessing.tokenizer\nWORDS_FEATURE = 'words'\nMAX_DOCUMENT_LENGTH = 60\n\n\nclass wordVal(object):\n  \"\"\"A helper class that represents a word and value simultaneously.\"\"\"\n\n  def __init__(self, word, val):\n    self.word = word\n    self.val = val\n\n  def __str__(self):\n    return self.word\n\n\nclass attentionDisplay(object):\n  \"\"\"A class to visualize attention weights produced by a classifer on a given string.\"\"\"\n\n  def __init__(self, vocab_processor, classifier, words_feature='words'):\n    \"\"\"\n        Args:\n          * vocab_processor: a trained vocabulary processor from\n            tf.contrib.learn.preprocessing.VocabularyProcessor\n          * classifier: the classifier of class Estimator produced in\n            Attention_Model_Codelab.ipynb\n          * words_feature (string): if provided, the key for the comments in the\n            feed dictionary expected by the classifier\n    \"\"\"\n\n    self.vocab_processor = vocab_processor\n    self.classifier = classifier\n    self.words_feature = words_feature\n\n  def _rgb_to_hex(self, rgb):\n    return '#%02x%02x%02x' % rgb\n\n  def _color_wordvals(self, s):\n    r = 255 - int(s.val * 255)\n    color = self._rgb_to_hex((255, r, r))\n    return 'background-color: %s' % color\n\n  def _predict_sentence(self, input_string):\n    x_test = self.vocab_processor.transform([input_string])\n    x_test = np.array(list(x_test))\n\n    test_input_fn = tf.estimator.inputs.numpy_input_fn(\n        x={self.words_feature: x_test}, num_epochs=1, shuffle=False)\n\n    predictions = self.classifier.predict(input_fn=test_input_fn)\n    y_predicted = []\n    alphas_predicted = []\n    for p in predictions:\n      y_predicted.append(p['class'])\n      alphas_predicted.append(p['attention'])\n    return y_predicted, alphas_predicted\n\n  def _resize_and_tokenize(self, input_string):\n    tokenized_sentence = list(tokenizer([input_string]))[0]\n    tokenized_sentence = tokenized_sentence + [''] * (\n        MAX_DOCUMENT_LENGTH - len(tokenized_sentence))\n    tokenized_sentence = tokenized_sentence[:MAX_DOCUMENT_LENGTH]\n    return tokenized_sentence\n\n  def display_prediction_attention(self, input_string):\n    \"\"\"Visualizes the attention weights of the initialized classifier on the given string.\"\"\"\n    pred, attn = self._predict_sentence(input_string)\n    if pred[0]:\n      print('Toxic')\n    else:\n      print('Not toxic')\n    tokenized_string = self._resize_and_tokenize(input_string)\n    wordvals = [wordVal(w, v) for w, v in zip(tokenized_string, attn[0])]\n    word_df = pd.DataFrame(wordvals).transpose()\n    return word_df.style.applymap(self._color_wordvals)\n"
  },
  {
    "path": "data_preparation/README.md",
    "content": "# Dataset preparation\n\nThis directory contains some steps to prepare our data before training our ML models. In particular, we want to:\n * Shuffle the data and split it into train, eval and test datasets.\n * Create an artificial bias (female vs male) for our embedding experiments. This is done by modifying the toxicity rate for examples labeled as 'male'.\n\n\n## Environment Setup\n\n### Python Dependencies\n\nInstall library dependencies (it is optional, but recommended to install these\nin a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html):\n\n    ```shell\n    # The python2 way to create and use virtual environment\n    # (optional, but recommended):\n    virtualenv .pyenv\n    source .pyenv/bin/activate\n    # Install dependencies\n    pip install -r requirements.txt\n\n    jupyter notebook\n\n    # ... do stuff ...\n\n    # Exit your virtual environment.\n    deactivate\n    ```\n\n\n### Execution flow\n\n\n#### Splits the data locally\n\nWe recommend using a small dataset 'train_small.tfrecord'.\n\n  ```shell\n  NOW=$(date +%Y%m%d%H%M%S)\n  JOB_NAME=data-preparation-$NOW\n\n  python run_preprocessing_data_split.py \\\n    --job_dir 'local_data' \\\n    --input_data_path 'local_data/train_small.tfrecord' \\\n    --output_folder 'local_data/train_eval_test/'\n  ```\n\n#### Splits the data on the cloud\n\n  ```shell\n  NOW=$(date +%Y%m%d%H%M%S)\n  JOB_NAME=data-preparation-$NOW\n\n  python run_preprocessing_data_split.py \\\n    --job_name $JOB_NAME \\\n    --job_dir gs://kaggle-model-experiments/dataflow/$JOB_NAME \\\n    --input_data_path 'gs://kaggle-model-experiments/resources/civil_comments_data/train.tfrecord' \\\n    --output_folder 'gs://kaggle-model-experiments/resources/civil_comments_data/train_eval_test' \\\n    --cloud\n  ```\n\n#### Creates the artificial_bias locally\n\n```shell\n  NOW=$(date +%Y%m%d%H%M%S)\n  JOB_NAME=data-preparation-$NOW\n\n  python run_preprocessing_artificial_bias.py \\\n    --job_dir 'local_data' \\\n    --input_data_path 'local_data/train_eval_test/train*.tfrecord' \\\n    --output_folder 'local_data/artificial_bias'\n  ```\n\n#### Creates the artificial_bias on the cloud\n\n```shell\n  NOW=$(date +%Y%m%d%H%M%S)\n  JOB_NAME=data-preparation-$NOW\n  python run_preprocessing_artificial_bias.py \\\n    --job_name $JOB_NAME \\\n    --job_dir gs://kaggle-model-experiments/dataflow/$JOB_NAME \\\n    --input_data_path 'gs://kaggle-model-experiments/resources/civil_comments_data/train_eval_test/train*.tfrecord' \\\n    --output_folder gs://kaggle-model-experiments/resources/civil_comments_data/artificial_bias/${USER}/${NOW} \\\n    --cloud\n  ```\n"
  },
  {
    "path": "data_preparation/config.ini",
    "content": "[CLOUD]\nproject = wikidetox\nrunner = DataflowRunner\nmax_num_workers = 50\ndefaultWorkerLogLevel = INFO\nlog_level = ERROR\nzone = us-east1-b\n\n[LOCAL]\nproject = wikidetox\nrunner = DirectRunner\ndefaultWorkerLogLevel=INFO\nlog_level = ERROR\n"
  },
  {
    "path": "data_preparation/preprocessing/__init__.py",
    "content": ""
  },
  {
    "path": "data_preparation/preprocessing/constants.py",
    "content": "\"\"\"Constants variables for preprocessing.\"\"\"\n\nTRAIN_DATA_PREFIX = 'train'\nEVAL_DATA_PREFIX = 'eval'\nTEST_DATA_PREFIX = 'test'\nTRAIN_ARTIFICIAL_BIAS_PREFIX = 'train_artificial_bias'\n"
  },
  {
    "path": "data_preparation/preprocessing/preprocessing.py",
    "content": "\"\"\"Preprocessing steps of the data preparation.\"\"\"\n\nimport os\nimport random\n\nimport apache_beam as beam\nimport tensorflow as tf\nfrom tensorflow_transform import coders\n\nimport constants\nimport tfrecord_utils\n\n\ndef get_identity_list():\n  return [\n      'male', 'female', 'transgender', 'other_gender', 'heterosexual',\n      'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation',\n      'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist',\n      'other_religion', 'black', 'white', 'asian', 'latino',\n      'other_race_or_ethnicity', 'physical_disability',\n      'intellectual_or_learning_disability', 'psychiatric_or_mental_illness',\n      'other_disability'\n  ]\n\n\ndef get_civil_comments_spec(include_identity_terms=True):\n  \"\"\"Returns the spec of the civil_comments dataset.\"\"\"\n  spec = {\n      'comment_text': tf.FixedLenFeature([], dtype=tf.string),\n      'id': tf.FixedLenFeature([], dtype=tf.string),\n      'toxicity': tf.FixedLenFeature([], dtype=tf.float32),\n      'severe_toxicity': tf.FixedLenFeature([], dtype=tf.float32),\n      'obscene': tf.FixedLenFeature([], dtype=tf.float32),\n      'sexual_explicit': tf.FixedLenFeature([], dtype=tf.float32),\n      'identity_attack': tf.FixedLenFeature([], dtype=tf.float32),\n      'insult': tf.FixedLenFeature([], dtype=tf.float32),\n      'threat': tf.FixedLenFeature([], dtype=tf.float32),\n      'toxicity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64),\n      'identity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64),\n  }\n  if include_identity_terms:\n    for identity in get_identity_list():\n      spec[identity] = tf.FixedLenFeature([],\n                                          dtype=tf.float32,\n                                          default_value=-1.0)\n  return spec\n\n\ndef split_data(examples, train_fraction, eval_fraction):\n  \"\"\"Splits the data into train/eval/test.\"\"\"\n\n  def partition_fn(data, n_partition):\n    random_value = random.random()\n    if random_value < train_fraction:\n      return 0\n    if random_value < train_fraction + eval_fraction:\n      return 1\n    return 2\n\n  examples_split = (examples | 'SplitData' >> beam.Partition(partition_fn, 3))\n  return examples_split\n\n\n@beam.ptransform_fn\ndef Shuffle(examples):  # pylint: disable=invalid-name\n  return (examples\n          | 'PairWithRandom' >> beam.Map(lambda x: (random.random(), x))\n          | 'GroupByRandom' >> beam.GroupByKey()\n          | 'DropRandom' >> beam.FlatMap(lambda (k, vs): vs))\n\n\ndef write_to_tf_records(examples, output_path):\n  \"\"\"Shuffles and writes to disk.\"\"\"\n\n  output_path_prefix = os.path.basename(output_path)\n  shuff_ex = (examples | 'Shuffle_' + output_path_prefix >> Shuffle())\n  _ = (\n      shuff_ex\n      | 'Serialize_' + output_path_prefix >> beam.ParDo(\n          tfrecord_utils.EncodeTFRecord(\n              feature_spec=get_civil_comments_spec(),\n              optional_field_names=get_identity_list()))\n      | 'WriteToTF_' + output_path_prefix >> beam.io.WriteToTFRecord(\n          file_path_prefix=output_path, file_name_suffix='.tfrecord'))\n\n\nclass OversampleExample(beam.DoFn):\n  \"\"\"Oversamples examples from a given class.\"\"\"\n\n  def __init__(self, rule_fn, oversample_rate):\n    if (oversample_rate <= 0) or not isinstance(oversample_rate, int):\n      raise ValueError('oversample_rate should be a positive integer.')\n    self._rule_fn = rule_fn\n    self._oversample_rate = oversample_rate\n\n  def process(self, element):\n    if self._rule_fn(element):\n      for _ in range(self._oversample_rate):\n        yield element\n    else:\n      yield element\n\n\ndef _select_male_toxic_example(example,\n                               threshold_identity=0.5,\n                               threshold_toxic=0.5):\n  is_toxic = example['toxicity'] >= threshold_toxic\n  if 'male' in example:\n    is_male = example['male'] >= threshold_identity\n  else:\n    is_male = False\n  return is_toxic and is_male\n\n\ndef run_data_split(p, input_data_path, train_fraction, eval_fraction,\n                   output_folder):\n  \"\"\"Splits the data into train/eval/test.\n\n  Args:\n    p: Beam pipeline for constructing PCollections and applying PTransforms.\n    input_data_path: Input TF Records.\n    train_fraction: Fraction of the data to be allocated to the training set.\n    eval_fraction: Fraction of the data to be allocated to the eval set.\n    output_folder: Folder to save the train/eval/test datasets.\n\n  Raises:\n    ValueError:\n        If train_fraction + eval_fraction >= 1.\n        If the output_directory exists. This exception prevents the user\n            from overwriting a previous split.\n  \"\"\"\n\n  if (train_fraction + eval_fraction >= 1.):\n    raise ValueError('Train and eval fraction are incompatible.')\n  if tf.gfile.Exists(output_folder):\n    raise ValueError('Output directory should be empty.'\n                     ' You should select a different path.')\n\n  examples = (\n      p\n      | 'ReadExamples' >>\n      beam.io.tfrecordio.ReadFromTFRecord(file_pattern=input_data_path))\n  examples = (\n      examples\n      | 'DecodeTFRecord' >> beam.ParDo(\n          tfrecord_utils.DecodeTFRecord(\n              feature_spec=get_civil_comments_spec(),\n              optional_field_names=get_identity_list())))\n\n  split = split_data(examples, train_fraction, eval_fraction)\n  train_data = split[0]\n  eval_data = split[1]\n  test_data = split[2]\n\n  write_to_tf_records(train_data,\n                      os.path.join(output_folder, constants.TRAIN_DATA_PREFIX))\n  write_to_tf_records(eval_data,\n                      os.path.join(output_folder, constants.EVAL_DATA_PREFIX))\n  write_to_tf_records(test_data,\n                      os.path.join(output_folder, constants.TEST_DATA_PREFIX))\n\n\ndef run_artificial_bias(p, train_input_data_path, output_folder,\n                        oversample_rate):\n  \"\"\"Main function to create artificial bias.\n\n  Args:\n    p: Beam pipeline for constructing PCollections and applying PTransforms.\n    train_input_data_path: Input TF Records, which is typically the training\n      dataset. This artificial bias method should not be run on eval/test.\n    output_folder: Folder to save the train/eval/test datasets.\n    oversample_rate: How many times to oversample the targeted class.\n  \"\"\"\n\n  train_data = (\n      p\n      | 'ReadExamples' >>\n      beam.io.tfrecordio.ReadFromTFRecord(file_pattern=train_input_data_path)\n      | 'DecodeTFRecord' >> beam.ParDo(\n          tfrecord_utils.DecodeTFRecord(\n              feature_spec=get_civil_comments_spec(),\n              optional_field_names=get_identity_list())))\n\n  train_data_artificially_biased = (\n      train_data\n      | 'CreateBias' >> beam.ParDo(\n          OversampleExample(_select_male_toxic_example, oversample_rate)))\n\n  write_to_tf_records(\n      train_data_artificially_biased,\n      os.path.join(output_folder, constants.TRAIN_ARTIFICIAL_BIAS_PREFIX))\n"
  },
  {
    "path": "data_preparation/preprocessing/tfrecord_utils.py",
    "content": "\"\"\"Utilities to decode and encode TF Records.\n\nThese utilities are wrappers around TF-Tranform coders to handle the\n    specificities around optional fields.\n\"\"\"\n\nimport apache_beam as beam\nfrom tensorflow_transform import coders\n\n\nclass Schema(object):\n  \"\"\"Defines the dataset schema for tf-transform.\n\n  We should have used dataset_schema from tensorflow_transform.tf_metadata.\n      However, there is a lack of support for `FixedLenFeature` default value,\n      and an exception is triggered by _feature_from_feature_spec.\n  TODO(fprost): Submit internal bug here.\n  \"\"\"\n\n  def __init__(self, spec):\n    self._spec = spec\n\n  def as_feature_spec(self):\n    return self._spec\n\n\nclass DecodeTFRecord(beam.DoFn):\n  \"\"\"Wrapper around ExampleProtoCoder for decoding optional fields.\n\n  To decode a TF-Record example, we use the  coder utility\n    'tensorflow_transform.codersExampleProtoCoder'. For optional fields,\n    (indicated by 'default_value' argument for `FixedLenFeature`), the coder\n    will generate the default value when the optional field is missing.\n  This wrapper post-processes the coder and removes the field if the default\n      value was used.\n  \"\"\"\n\n  def __init__(self,\n               feature_spec,\n               optional_field_names,\n               rule_optional_fn=lambda x: x < 0):\n    \"\"\"Initialises a TF-Record decoder.\n\n    Args:\n      feature_spec: Dictionary from feature names to one of `FixedLenFeature`,\n        `SparseFeature` or `VarLenFeature. It contains all the features to parse\n        (including optional ones).\n      optional_field_names: list of optional fields.\n      rule_optional_fn: function that take the value of an optional field and\n        returns True if the value is indicative of a default value (e.g.\n        resulting from the default value of parsing FixedLenFeature).  Current\n        code requires that all optional_field_names share the rule_optional_fn.\n    \"\"\"\n    self._schema = Schema(feature_spec)\n    self._coder = coders.ExampleProtoCoder(self._schema)\n    self._optional_field_names = optional_field_names\n    self._rule_optional_fn = rule_optional_fn\n\n  def process(self, element):\n    parsed_element = self._coder.decode(element)\n    for identity in self._optional_field_names:\n      if self._rule_optional_fn(parsed_element[identity]):\n        del parsed_element[identity]\n    yield parsed_element\n\n\nclass EncodeTFRecord(beam.DoFn):\n  \"\"\"Wrapper around ExampleProtoCoder for encoding optional fields.\"\"\"\n\n  def __init__(self, feature_spec, optional_field_names):\n    \"\"\"Initialises a TF-Record encoder.\n\n    Args:\n      feature_spec: Dictionary from feature names to one of `FixedLenFeature`,\n        `SparseFeature` or `VarLenFeature. It contains all the features to parse\n        (including optional ones).\n      optional_field_names: list of optional fields.\n    \"\"\"\n    self._feature_spec = feature_spec\n    self._optional_field_names = optional_field_names\n\n  def process(self, element):\n    element_spec = self._feature_spec.copy()\n    for identity in self._optional_field_names:\n      if identity not in element:\n        del element_spec[identity]\n    element_schema = Schema(element_spec)\n    coder = coders.ExampleProtoCoder(element_schema)\n    encoded_element = coder.encode(element)\n    yield encoded_element\n"
  },
  {
    "path": "data_preparation/requirements.txt",
    "content": "apache-beam[gcp]==2.2.0\nconfigparser==3.5.0\ntensorflow==2.12.1\ntensorflow_transform==0.9\n"
  },
  {
    "path": "data_preparation/run_preprocessing_artificial_bias.py",
    "content": "\"\"\"Sets up and start the Dataflow job for data preparation.\"\"\"\n\nimport argparse\nimport logging\nimport os\nimport sys\n\nimport apache_beam as beam\nimport configparser\nfrom preprocessing import preprocessing\n\n\ndef _parse_arguments(argv):\n  \"\"\"Parses command line arguments.\"\"\"\n  parser = argparse.ArgumentParser(\n      description='Runs Preprocessing on Civil comments data.')\n  parser.add_argument(\n      '--cloud', action='store_true', help='Run preprocessing on the cloud.')\n  parser.add_argument('--job_name', required=False, help='Dataflow job name')\n  parser.add_argument(\n      '--job_dir',\n      required=True,\n      help='Directory in which to stage code and write temporary outputs')\n  parser.add_argument(\n      '--output_folder',\n      required=True,\n      help='Directory where to write train, eval and test data')\n  parser.add_argument('--input_data_path')\n  parser.add_argument(\n      '--oversample_rate',\n      required=False,\n      default=5,\n      type=int,\n      help='How many times to oversample the targeted class')\n  args = parser.parse_args(args=argv[1:])\n  return args\n\n\ndef _set_logging(log_level):\n  logging.getLogger().setLevel(getattr(logging, log_level.upper()))\n\n\ndef _parse_config(env, config_file_path):\n  \"\"\"Parses configuration file.\n\n  Args:\n    env: The environment in which the preprocessing job will be run.\n    config_file_path: Path to the configuration file to be parsed.\n\n  Returns:\n    A dictionary containing the parsed runtime config.\n  \"\"\"\n  config = configparser.ConfigParser()\n  config.read(config_file_path)\n  return dict(config.items(env))\n\n\ndef main():\n  \"\"\"Configures pipeline and spawns preprocessing job.\"\"\"\n  args = _parse_arguments(sys.argv)\n  config = _parse_config('CLOUD' if args.cloud else 'LOCAL', 'config.ini')\n  options = {'project': str(config.get('project'))}\n  if args.cloud:\n    if not args.job_name:\n      raise ValueError('Job name must be specified for cloud runs.')\n    options.update({\n        'job_name':\n            args.job_name,\n        'max_num_workers':\n            int(config.get('max_num_workers')),\n        'setup_file':\n            os.path.abspath(\n                os.path.join(os.path.dirname(__file__), 'setup.py')),\n        'staging_location':\n            os.path.join(args.job_dir, 'staging'),\n        'temp_location':\n            os.path.join(args.job_dir, 'tmp'),\n        'zone':\n            config.get('zone')\n    })\n\n  pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)\n  _set_logging(config.get('log_level'))\n  with beam.Pipeline(\n      str(config.get('runner')), options=pipeline_options) as pipeline:\n    preprocessing.run_artificial_bias(\n        pipeline,\n        train_input_data_path=args.input_data_path,\n        output_folder=args.output_folder,\n        oversample_rate=args.oversample_rate)\n\n\nif __name__ == '__main__':\n  main()\n"
  },
  {
    "path": "data_preparation/run_preprocessing_data_split.py",
    "content": "\"\"\"Sets up and start the Dataflow job for data preparation.\"\"\"\n\nimport argparse\nimport logging\nimport os\nimport sys\n\nimport apache_beam as beam\nimport configparser\nfrom preprocessing import preprocessing\n\n\ndef _parse_arguments(argv):\n  \"\"\"Parses command line arguments.\"\"\"\n  parser = argparse.ArgumentParser(\n      description='Runs Preprocessing on Civil comments data.')\n  parser.add_argument(\n      '--cloud', action='store_true', help='Run preprocessing on the cloud.')\n  parser.add_argument('--job_name', required=False, help='Dataflow job name')\n  parser.add_argument(\n      '--job_dir',\n      required=True,\n      help='Directory in which to stage code and write temporary outputs')\n  parser.add_argument(\n      '--output_folder',\n      required=True,\n      help='Directory where to write train, eval and test data')\n  parser.add_argument('--input_data_path')\n  parser.add_argument(\n      '--train_fraction',\n      required=False,\n      default=0.7,\n      type=float,\n      help='The fraction of the data to allocate to the training dataset')\n  parser.add_argument(\n      '--eval_fraction',\n      required=False,\n      default=0.15,\n      type=float,\n      help='The fraction of the data to allocate to the eval dataset')\n  args = parser.parse_args(args=argv[1:])\n  return args\n\n\ndef _set_logging(log_level):\n  logging.getLogger().setLevel(getattr(logging, log_level.upper()))\n\n\ndef _parse_config(env, config_file_path):\n  \"\"\"Parses configuration file.\n\n  Args:\n    env: The environment in which the preprocessing job will be run.\n    config_file_path: Path to the configuration file to be parsed.\n\n  Returns:\n    A dictionary containing the parsed runtime config.\n  \"\"\"\n  config = configparser.ConfigParser()\n  config.read(config_file_path)\n  return dict(config.items(env))\n\n\ndef main():\n  \"\"\"Configures pipeline and spawns preprocessing job.\"\"\"\n  args = _parse_arguments(sys.argv)\n  config = _parse_config('CLOUD' if args.cloud else 'LOCAL', 'config.ini')\n  options = {'project': str(config.get('project'))}\n  if args.cloud:\n    if not args.job_name:\n      raise ValueError('Job name must be specified for cloud runs.')\n    options.update({\n        'job_name':\n            args.job_name,\n        'max_num_workers':\n            int(config.get('max_num_workers')),\n        'setup_file':\n            os.path.abspath(\n                os.path.join(os.path.dirname(__file__), 'setup.py')),\n        'staging_location':\n            os.path.join(args.job_dir, 'staging'),\n        'temp_location':\n            os.path.join(args.job_dir, 'tmp'),\n        'zone':\n            config.get('zone')\n    })\n\n  pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)\n  _set_logging(config.get('log_level'))\n  with beam.Pipeline(\n      str(config.get('runner')), options=pipeline_options) as pipeline:\n    preprocessing.run_data_split(\n        pipeline,\n        input_data_path=args.input_data_path,\n        train_fraction=args.train_fraction,\n        eval_fraction=args.eval_fraction,\n        output_folder=args.output_folder)\n\n\nif __name__ == '__main__':\n  main()\n"
  },
  {
    "path": "data_preparation/setup.py",
    "content": "from setuptools import setup, find_packages\n\nNAME = 'jigsaw'\nVERSION = '1.0'\nREQUIRED_PACKAGES = ['tensorflow-transform==0.9.0']\n\nsetup(\n    name=NAME,\n    version=VERSION,\n    packages=find_packages(),\n    install_requires=REQUIRED_PACKAGES,\n)\n"
  },
  {
    "path": "experiments/.gitignore",
    "content": "# Ignore local data, e.g. copies of embeddings\nlocal_data\n\n# Ignore local tmp files and directories\ntmp\n\n# Local config to holds cloud/comel.ml settings.\ntf_trainer/convai_config.py\n"
  },
  {
    "path": "experiments/README.md",
    "content": "# Text Classification Framework\n\nThis directory contains an ML framework for text classification. We illustrate\nit with toxic (and other attributes) comment classification.\n\nThe framework is structured as a series of common files and templates to quickly\nconstruct models on top of the [Keras](https://keras.io/) or the [TensorFlow\nEstimator API](https://www.tensorflow.org/programmers_guide/estimators).\n\nThe templates also demonstrate how these models can be trained using [Google ML\nEngine](https://cloud.google.com/ml-engine/).\n\n\n## Environment Setup\n\n### Build Tools/Bazel Dependencies\n\nInstall [Bazel](https://docs.bazel.build/versions/master/install-os-x.html);\nthis is the build tool we use to run tests, etc.\n\n### Python Dependencies\n\nInstall library dependencies (it is optional, but recommended to install these\nin a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html):\n\n```shell\n# The python3 way to create and use virtual environment\n# (optional, but recommended):\npython3 -m venv .pyenv\nsource .pyenv/bin/activate\n# Install dependencies\npip install -r requirements.txt\n\n# ... do stuff ...\n\n# Exit your virtual environment.\ndeactivate\n```\n\n### Cloud and ML Engine configuration\n\n1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/).\n2. Log in:\n```shell\ngcloud auth login\n```\nYou will be prompted to visit a page in the browser; follow the login instructions there.\n\nDue to [some issues](https://stackoverflow.com/questions/44401088/using-training-tfrecords-that-are-stored-on-google-cloud), also run this command:\n\n```shell\ngcloud auth application-default login\n```\nFollow the instructions there as well.\n\n3. Set the project:\n```shell\ngcloud config set project [PROJECT]\n```\n\n4. Verify that the above setup works:\n```shell\ngcloud ml-engine models list\n```\n\nYou should see some existing models. Example output:\n```shell\nNAME                                DEFAULT_VERSION_NAME\nkaggle_model                        v_20180627_173451\n...\n```\n\n## Training an Existing Model\n\nTo train an existing model, execute either command:\n * `./tf_trainer/MODEL_NAME/run.local.sh` to run training locally, or\n * `./tf_trainer/MODEL_NAME/run.ml_engine.sh` to run training on [Google ML\nEngine](https://cloud.google.com/ml-engine/).\n\nThese scripts assume that you have access to the resources on our cloud\nprojects. If you don't, you can still run the models locally, but will have to\nmodify the data paths in `run.local.sh`. At the moment, we only support reading\ndata in `tf.record` format. See\n[`tools/convert_csv_to_tfrecord.py`](https://github.com/conversationai/conversationai-models/blob/master/experiments/tools/convert_csv_to_tfrecord.py)\nfor a simple CSV to `tf.record` converter.\n\n\n## Running a hyper parameter tuning job\n\nTo run a hyper parameter tuning job on CMLE, execute the following command:\n * `./tf_trainer/MODEL_NAME/run.hyperparameter.sh`.\n\nThe hyperparameter configuration (MODEL_NAME/hparam_config.yaml) describes the job configuration, the parameters to tune and their respective range.\n\nYou can monitor your progress in the CMLE UI.\n\n\n## Deploying a trained model on CMLE\n\nAt the end of your training, the model will be saved as a .pb file. Note: this is currently broken for keras models. TODO(fprost): Update this.\n\nYou can then deploy this model on CMLE by executing the following command:\n * `./tf_trainer/MODEL_NAME/run.deploy.sh`.\n\nThe model will be accessible as an API and available for [batch/online predictions](https://cloud.google.com/ml-engine/docs/tensorflow/batch-predict).\nFurther information can be found [here](https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models) about deploying models on CMLE.\n\n## Deploying several models on CMLE for a given training run\n\nThe argument `n_export` allows you to save several models during your training run (1 model every train_steps/n).\nAll of the .pb filed will be saved in a subfolder of your MODEL_DIR.\n\nThere is a convenient utility in model_evaluation to help you to deploy all models on CMLE:\n * `python utils_export/deploy_continous_model.py --parent_dir MODEL_DIR --model_name MODEL_NAME `\n\n\n## Evaluate an Existing Model on New Data\n\nSee `model_evaluation/` for further information.\n\n\n### Type Checking\n\nCheck the typings:\n\n```shell\nmypy --ignore-missing-imports -p tf_trainer\n```\n\nIt's recommended you use mypy as an additional linter in your editor.\n\n### Testing\n\nRun all the tests and see the output streamed:\n\n```shell\nbazel test --test_output=streamed ...\n```\n\nYou can also run tests individually, directly with python like so:\n\n```shell\npython -m tf_trainer.common.tfrecord_input_test\npython -m tf_trainer.common.base_keras_model_test\n```\n\n### Building a New Model\n\nTODO(jjtan)\n"
  },
  {
    "path": "experiments/WORKSPACE",
    "content": "# Bazel Workspace File.\n"
  },
  {
    "path": "experiments/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/requirements.txt",
    "content": "absl-py==0.7.0\nastor==0.7.1\nbert-tensorflow==1.0.1\nbleach==3.3.0\ncertifi==2024.7.4\nchardet==3.0.4\ngast==0.2.2\ngcsfs==0.2.3\ngrpcio==1.53.2\nh5py==2.9.0\nhtml5lib==1.0.1\nidna==3.7\njsonlines==1.2.0\nMarkdown==3.0.1\nmypy==0.670\nnltk==3.9\nnumpy==1.22.0\npandas==0.24.1\nprotobuf==3.18.3\nPyYAML==5.4\nrequests==2.32.2\nscipy==1.10.0\nsentencepiece==0.1.8\nsix==1.12.0\ntensorboard==1.12.2\ntensorflow==2.12.1\ntensorflow-hub==0.2.0\ntermcolor==1.1.0\ntf-sentencepiece==0.1.8\ntyped-ast==1.3.2\nurllib3==1.26.19\nwebsocket-client==0.54.0\nWerkzeug==3.0.3\nwurlitzer==1.0.2\n"
  },
  {
    "path": "experiments/setup.py",
    "content": "from setuptools import find_packages\nfrom setuptools import setup\n\nREQUIRED_PACKAGES = [\n    'nltk>=3.3',\n    'typed_ast==1.3.2',\n    'tensorflow-hub==0.1.1',\n    'bert-tensorflow==1.0.1'\n]\n\nsetup(\n    name='tf_trainer',\n    version='0.1',\n    install_requires=REQUIRED_PACKAGES,\n    packages=find_packages(),\n    include_package_data=True,\n    description='TF Estimator modelling framework.')\n"
  },
  {
    "path": "experiments/testdata/BUILD",
    "content": "exports_files([\n  \"cats_and_dogs_onehot.vocab.txt\",\n  \"cats_and_dogs_with_cat_opt_int_labels.jsonl\",\n  \"cats_and_dogs_with_partial_cat_int_labels.jsonl\",\n  \"cats_and_dogs.jsonl\",\n])\n"
  },
  {
    "path": "experiments/testdata/cats_and_dogs.jsonl",
    "content": "{ \"text\": \"cats good\", \"bad\": 0.0 }\n{ \"text\": \"cats bad\", \"bad\": 1.0 }\n{ \"text\": \"dogs good\", \"bad\": 0.0 }\n{ \"text\": \"dogs bad\", \"bad\": 1.0 }\n{ \"text\": \"good cats\", \"bad\": 0.0 }\n{ \"text\": \"dogs and cats\", \"bad\": 0.0 }\n{ \"text\": \"not bad dogs and cats\", \"bad\": 0.0 }\n{ \"text\": \"not bad dogs\", \"bad\": 0.0 }\n{ \"text\": \"bad dogs and cats\", \"bad\": 1.0 }\n{ \"text\": \"bad dogs and bad cats\", \"bad\": 1.0 }\n{ \"text\": \"dogs and bad cats\", \"bad\": 1.0 }\n{ \"text\": \"dogs and not bad cats\", \"bad\": 0.0 }\n{ \"text\": \"dogs and cats bad\", \"bad\": 1.0 }\n{ \"text\": \"dogs and cats good\", \"bad\": 1.0 }\n{ \"text\": \"not dogs and bad cats\", \"bad\": 1.0 }\n{ \"text\": \"not dogs and not cats\", \"bad\": 0.0 }\n"
  },
  {
    "path": "experiments/testdata/cats_and_dogs_onehot.vocab.txt",
    "content": "dogs 1.0 0.0 0.0 0.0 0.0 0.0\ncats 0.0 1.0 0.0 0.0 0.0 0.0\ngood 0.0 0.0 1.0 0.0 0.0 0.0\nbad 0.0 0.0 0.0 1.0 0.0 0.0\nand 0.0 0.0 0.0 0.0 1.0 0.0\nnot 0.0 0.0 0.0 0.0 0.0 1.0\n"
  },
  {
    "path": "experiments/testdata/cats_and_dogs_with_cat_opt_int_labels.jsonl",
    "content": "{ \"text\": \"cats good\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"cats bad\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"dogs good\", \"bad\": 0.0 }\n{ \"text\": \"dogs bad\", \"bad\": 1.0 }\n{ \"text\": \"good cats\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"dogs and cats\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"not bad dogs and cats\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"not bad dogs\", \"bad\": 0.0 }\n{ \"text\": \"bad dogs and cats\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"bad dogs and bad cats\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"dogs and bad cats\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"dogs and not bad cats\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"dogs and cats bad\", \"bad\": 1.0, \"cat\": 1  }\n{ \"text\": \"dogs and cats good\", \"bad\": 1.0, \"cat\": 1  }\n{ \"text\": \"not dogs and bad cats\", \"bad\": 1.0, \"cat\": 1  }\n{ \"text\": \"not dogs and not cats\", \"bad\": 0.0, \"cat\": 1 }\n"
  },
  {
    "path": "experiments/testdata/cats_and_dogs_with_partial_cat_int_labels.jsonl",
    "content": "{ \"text\": \"cats good\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"cats bad\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"dogs good\", \"bad\": 0.0, \"cat\": 0  }\n{ \"text\": \"dogs bad\", \"bad\": 1.0, \"cat\": 0  }\n{ \"text\": \"good cats\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"dogs and cats\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"not bad dogs and cats\", \"bad\": 0.0, \"cat\": 1 }\n{ \"text\": \"not bad dogs\", \"bad\": 0.0, \"cat\": 0 }\n{ \"text\": \"bad dogs and cats\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"bad dogs and bad cats\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"dogs and bad cats\", \"bad\": 1.0, \"cat\": 1 }\n{ \"text\": \"dogs and not bad cats\", \"bad\": 0.0}\n{ \"text\": \"dogs and cats bad\", \"bad\": 1.0 }\n{ \"text\": \"dogs and cats good\", \"bad\": 1.0 }\n{ \"text\": \"not dogs and bad cats\", \"bad\": 1.0 }\n{ \"text\": \"not dogs and not cats\", \"bad\": 0.0 }\n"
  },
  {
    "path": "experiments/tf_trainer/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/common/BUILD",
    "content": "py_library(\n    name = \"types\",\n    srcs = [\n        \"types.py\",\n    ],\n)\n\npy_library(\n    name = \"model_trainer\",\n    srcs = [\n        \"model_trainer.py\",\n    ],\n    deps = [\n        \":base_model\",\n        \":data_input\",\n        \":text_preprocessor\",\n        \":types\",\n    ],\n)\n\npy_library(\n    name = \"token_embedding_index\",\n    srcs = [\n        \"token_embedding_index.py\",\n    ],\n    deps = [\n        \":base_model\",\n        \":types\",\n    ],\n)\n\npy_test(\n    name = \"token_embedding_index_test\",\n    srcs = [\"token_embedding_index_test.py\"],\n    data = [\"//testdata:cats_and_dogs_onehot.vocab.txt\"],\n    deps = [\n        \":token_embedding_index\",\n        \":types\",\n    ],\n)\n\npy_library(\n    name = \"text_preprocessor\",\n    srcs = [\n        \"text_preprocessor.py\",\n    ],\n    deps = [\n        \":base_model\",\n        \":token_embedding_index\",\n        \":types\",\n    ],\n)\n\npy_test(\n    name = \"text_preprocessor_test\",\n    srcs = [\"text_preprocessor_test.py\"],\n    data = [\n        \"//testdata:cats_and_dogs_onehot.vocab.txt\",\n    ],\n    deps = [\n        \":text_preprocessor\",\n        \":types\",\n    ],\n)\n\npy_library(\n    name = \"base_model\",\n    srcs = [\n        \"base_model.py\",\n    ],\n    deps = [\":types\"],\n)\n\npy_library(\n    name = \"data_input\",\n    srcs = [\n        \"dataset_input.py\",\n        \"tfrecord_input.py\",\n        \":base_model\",\n    ],\n    deps = [\":types\"],\n)\n\npy_test(\n    name = \"tfrecord_input_test\",\n    srcs = [\"tfrecord_input_test.py\"],\n    deps = [\n        \":data_input\",\n        \":types\",\n    ],\n)\n\npy_library(\n    name = \"cnn_spec_parser\",\n    srcs = [\"cnn_spec_parser.py\"],\n    deps = [\":types\"],\n)\n\npy_test(\n    name = \"cnn_spec_parser_test\",\n    srcs = [\"cnn_spec_parser_test.py\"],\n    deps = [\n        \":cnn_spec_parser\",\n        \":types\",\n    ],\n)\n\npy_library(\n    name = \"episodic_tfrecord_input\",\n    srcs = [\"episodic_tfrecord_input.py\"],\n    deps = [\n        \":types\",\n        \":base_model\",\n        \":data_input\",\n    ],\n)\n"
  },
  {
    "path": "experiments/tf_trainer/common/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/common/base_model.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Interface for Models.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nimport tensorflow as tf\n\nfrom tf_trainer.common import types\nfrom typing import Callable\n\n# The TF Example key associated with input features that consist of an\n# UTF-8 string, for models that use that as input.\nTEXT_FEATURE_KEY = 'text'\n\n# The TF Example key associated with a Tensor of int32s for models that\n# use tokens from a vocabulary as input.\nTOKENS_FEATURE_KEY = 'tokens'\n\n# The TF Example key associated with examples in inference that consist of\n# an int64 integer. It is a unique identifier of the TF Example and is passed\n# along by the estimator and returned in the predictions (forward_features).\nEXAMPLE_KEY = 'comment_key'\n\n\nclass BaseModel(abc.ABC):\n  \"\"\"Tentative interface for all model classes.\n\n  Although the code doesn't take advantage of this interface yet, all models\n  should subclass this one.\n  \"\"\"\n\n  def map(self, f: Callable[[tf.estimator.Estimator], tf.estimator.Estimator]\n         ) -> 'BaseModel':\n    \"\"\"Allows models to be extended. e.g.\n\n    adding preprocessing steps.\n    \"\"\"\n\n    class Model(BaseModel):\n\n      def estimator(unused, model_dir):\n        del unused\n        return f(self.estimator(model_dir))\n\n      def hparams(unused):\n        del unused\n        return self.hparams()\n\n    return Model()\n\n  @abc.abstractmethod\n  def estimator(self, model_dir: str) -> tf.estimator.Estimator:\n    pass\n\n  def hparams(self) -> tf.contrib.training.HParams:\n    return tf.contrib.training.HParams()\n"
  },
  {
    "path": "experiments/tf_trainer/common/basic_gpu_config.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU"
  },
  {
    "path": "experiments/tf_trainer/common/cnn_spec_parser.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"CNN Specification Parser.\n\nA simple parser for specifications of convolutional layers.\n\nBNF defining the syntax to specify CNNs:\n```\n  layers = layer : layers\n  layer = filters\n  filters = filter, filters\n  filter = (size / stride -> num_filters)\n  size, stride, num_filters = \\d+\n```\n\nInspiration for the notation comes from: `num_filters` being the output\nembedding size, and the other dimension of the computed CNN matrix will be\n`input_size * size / stride`.\n\"\"\"\n\nimport re\nfrom typing import List\n\nlayers_split_regexp = re.compile(r'\\s*:\\s*')\nfilters_split_regexp = re.compile(r'\\s*,\\s*')\nfilter_regexp = re.compile(r'\\(\\s*(?P<size>\\d+)\\s*/\\s*(?P<stride>\\d+)\\s*'\n                           r'\\-\\>\\s*(?P<num_filters>\\d+)\\s*\\)')\n\n\nclass FilterParseError(Exception):\n  pass\n\n\nclass Filter(object):\n  \"\"\"A single CNN filter.\n\n  filter = '(size / stride -> num_filters)'\n  \"\"\"\n\n  def __init__(self, str: str) -> None:\n    m = filter_regexp.match(str)\n    if m is None:\n      raise FilterParseError('Bad filter definition for: %s' % str)\n    self.num_filters = int(m.group('num_filters'))  # type \"int\"\n    self.size = int(m.group('size'))  # type \"int\"\n    self.stride = int(m.group('stride'))  # type \"int\"\n\n  def __str__(self) -> str:\n    return ('(%d / %d -> %d)' % (self.size, self.stride, self.num_filters))\n\n\nclass ConcurrentFilters(object):\n  \"\"\"A set of concurrent CNN filters that make up one layer\n\n  filters = filter, filters\n  \"\"\"\n\n  def __init__(self, str: str) -> None:\n    filter_spec_strs = filters_split_regexp.split(str)\n    self.filters = [Filter(s) for s in filter_spec_strs]\n\n  def __str__(self) -> str:\n    return ', '.join([str(f) for f in self.filters])\n\n\nclass SequentialLayers(object):\n  \"\"\"A sequence of CNN layers\n\n  layers = filters : layers\n  \"\"\"\n\n  def __init__(self, str: str) -> None:\n    layer_spec_strs = layers_split_regexp.split(str)\n    self.layers = [ConcurrentFilters(s) for s in layer_spec_strs\n                  ]  # type: List[ConcurrentFilters]\n\n  def __str__(self) -> str:\n    return ' : '.join([str(f) for f in self.layers])\n"
  },
  {
    "path": "experiments/tf_trainer/common/cnn_spec_parser_test.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Tests for tfrecord_input.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom tf_trainer.common.cnn_spec_parser import SequentialLayers\nfrom tf_trainer.common.cnn_spec_parser import ConcurrentFilters\nfrom tf_trainer.common.cnn_spec_parser import Filter\n\n\nclass CnnSpecParserTest(tf.test.TestCase):\n\n  def test_SequentialLayers(self):\n    s = ('(2 / 2 -> 100), (3 / 2 -> 101) '\n         ': (6 / 2 -> 102) '\n         ': (3 / 1 -> 103)')\n    spec = SequentialLayers(s)\n    layer0 = spec.layers[0]\n    self.assertEqual(len(layer0.filters), 2)\n    layer0filter0 = layer0.filters[0]  # type: Filter\n    self.assertEqual(layer0filter0.size, 2)\n    self.assertEqual(layer0filter0.stride, 2)\n    self.assertEqual(layer0filter0.num_filters, 100)\n    self.assertEqual(str(spec), s)\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "experiments/tf_trainer/common/dataset_config.sh",
    "content": "#!/bin/bash\n\nBASE_PATH=\"gs://conversationai-models\"\nGCS_RESOURCES=\"${BASE_PATH}/resources\"\nMODEL_PARENT_DIR=\"${BASE_PATH}/tf_trainer_runs\"\n\nif [ \"$1\" == \"civil_comments\" ]; then\n    train_path=\"${GCS_RESOURCES}/civil_comments_data/train_eval_test/train-*.tfrecord\"\n    valid_path=\"${GCS_RESOURCES}/civil_comments_data/train_eval_test/eval-*.tfrecord\"\n    labels=\"toxicity\"\n    label_dtypes=\"float\"\n    text_feature=\"comment_text\"\n\nelif [ \"$1\" == \"toxicity\" ]; then\n    train_path=\"${GCS_RESOURCES}/toxicity_data/toxicity_q42017_train.tfrecord\"\n    valid_path=\"${GCS_RESOURCES}/toxicity_data/toxicity_q42017_validate.tfrecord\"\n    labels=\"frac_neg\"\n    label_dtypes=\"float\"\n    text_feature=\"comment_text\"\n\nelif [ \"$1\" == \"many_communities\" ]; then\n    train_path=\"${GCS_RESOURCES}/transfer_learning_data/many_communities/20181105_train.tfrecord\"\n    valid_path=\"${GCS_RESOURCES}/transfer_learning_data/many_communities/20181105_validate.tfrecord\"\n    labels=\"removed\"\n    # removed is a boolean variable cast as an int.\n    # 1 means that the comment was removed and 0 means it was not.\n    label_dtypes=\"int\"\n    text_feature=\"comment_text\"\n\nelif [ \"$1\" == \"many_communities_40_per_8_shot\" ]; then\n\n    if [ \"$2\" == \"optimistic\" ]; then\n        train_path=\"${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/augmented_train.tfrecord\"\n    elif [ \"$2\" == \"pessimistic\" ]; then\n        train_path=\"${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/original_train..tfrecord\"\n    else\n        echo \"Must provide second positional argument.\"\n        exit 1\n    fi\n\n    valid_path=\"${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord\"\n    # test_path = \"${GCS_RESOURCES}/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord\"\n    labels=\"label\"\n    # removed is a boolean variable cast as an int.\n    # 1 means that the comment was removed and 0 means it was not.\n    label_dtypes=\"int\"\n    text_feature=\"text\"\n\n    # used for param tuning\n    train_steps=3000\n    eval_steps=250\n    eval_period=200\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    exit 1\nfi\n"
  },
  {
    "path": "experiments/tf_trainer/common/dataset_input.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Abstract Base Class for DatasetInput.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport abc\nfrom tf_trainer.common import types\n\n\nclass DatasetInput(abc.ABC):\n  \"\"\"Abstract Base Class for Dataset Input.\n\n  Provides the input functions (referred to as input_fn in TF docs) to be used\n  with Tensorflow Estimator's train, evaluate, and predict methods.\n  \"\"\"\n\n  @abc.abstractmethod\n  def train_input_fn(self) -> types.EstimatorInput:\n    pass\n\n  @abc.abstractmethod\n  def validate_input_fn(self) -> types.EstimatorInput:\n    pass\n"
  },
  {
    "path": "experiments/tf_trainer/common/episodic_tfrecord_input.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"DatasetInput implementation for episodic data.\"\"\"\n\nimport tensorflow as tf\nfrom pathlib import Path\n\nimport collections\nimport os\nimport random\n\nfrom tf_trainer.common import dataset_input\nfrom tf_trainer.common import types\nfrom typing import List, Dict, Tuple, Union\n\ntf.app.flags.DEFINE_string('train_path', None,\n                           'Path to the training data TFRecord file.')\ntf.app.flags.DEFINE_string('dev_path', None,\n                           'Path to the training data TFRecord file.')\ntf.app.flags.DEFINE_string('episode_size', None,\n                           'Path to the training data TFRecord file.')\n\nText = Union[tf.Tensor, str]\nLabel = Union[tf.Tensor, float]\n\nTextDomainLabel = collections.namedtuple('TextDomainLabel',\n                                         ['text', 'domain', 'label'])\nEpisodeData = collections.namedtuple('EpisodeData',\n                                     ['texts', 'domains', 'labels'])\n\n\nclass EpisodicTFRecordInput(dataset_input.DatasetInput):\n  \"\"\"Generates episodic data.\"\"\"\n\n  def __init__(self, train_dir, validate_dir) -> None:\n    self.train_dir = train_dir\n    self.validate_dir = validate_dir\n\n  def train_input_fn(self) -> types.FeatureAndLabelTensors:\n    all_episodes = self._get_randomized_episodes(self.train_dir)\n    all_texts = [ep.texts for ep in all_episodes]\n    all_domains = [ep.domains for ep in all_episodes]\n    all_labels = [ep.labels for ep in all_episodes]\n    ds = tf.data.Dataset.from_tensor_slices((all_texts, all_domains,\n                                             all_labels))\n    self.episode_batches_itr = ds.make_one_shot_iterator()\n    return self.episode_batches_itr.get_next()\n\n  def validate_input_fn(self) -> types.FeatureAndLabelTensors:\n    pass\n\n  def _get_randomized_episodes(self, directory: str) -> List[EpisodeData]:\n    \"\"\"Retrieves a list of domain specific datasets.\n\n    Given a directory of TFRecord files, each holding data for a given domain,\n    with file name \"[domain].tfrecord\", returns an iterator of datasets, each\n    corresponding to the data for a single domain.\n    \"\"\"\n\n    tfrecord_files = tf.gfile.Glob(os.path.join(directory, '*.tfrecord'))\n    episodes = []\n    for file_no, tfrecord_file in enumerate(tfrecord_files):\n      tf.logging.info('PROCESSING FILE {}: {}'.format(file_no, tfrecord_file))\n      episodes.append(self._dataset_from_tfrecord_file(tfrecord_file))\n\n    tf.logging.info('Shuffling episodes')\n    random.shuffle(episodes)  # In place shuffle.\n\n    return episodes\n\n  def _dataset_from_tfrecord_file(self, tfrecord_file: str) -> EpisodeData:\n    # The domain happens to be the file stem.\n    domain = Path(tfrecord_file).stem\n\n    def _read_tf_example(record) -> TextDomainLabel:\n      parsed = tf.parse_single_example(\n          record, {\n              'text': tf.FixedLenFeature([], tf.string),\n              'label': tf.FixedLenFeature([], tf.int64)\n          })  # type: Dict[str, types.Tensor]\n\n      return TextDomainLabel(\n          text=parsed['text'], domain=domain, label=parsed['label'])\n\n    examples = list(tf.python_io.tf_record_iterator(tfrecord_file))\n    random.shuffle(examples)\n\n    datapoints = [_read_tf_example(example) for example in examples]\n    return EpisodeData(\n        texts=[dp.text for dp in datapoints],\n        domains=[dp.domain for dp in datapoints],\n        labels=[dp.label for dp in datapoints])\n"
  },
  {
    "path": "experiments/tf_trainer/common/episodic_tfrecord_input_test.py",
    "content": "\"\"\"Tests for episodic_tfrecord_input.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tf_trainer.common import episodic_tfrecord_input\n\n\nclass EpisodicTFRecordInputTest(tf.test.TestCase):\n\n  def test(self):\n    train_dir = 'gs://kaggle-model-experiments/resources/transfer_learning_data/many_communities_pruned_episodes'\n    tf.logging.info('CREATE')\n    e = episodic_tfrecord_input.EpisodicTFRecordInput(train_dir, 'asdf')\n    tf.logging.info('GET DATA')\n    episodic_batch = e.train_input_fn()\n    with tf.Session() as session:\n      tf.logging.info('FIRST BATCH')\n      tf.logging.info(session.run(episodic_batch))\n      tf.logging.info('SECOND BATCH')\n      print(session.run(episodic_batch))\n\n\nif __name__ == '__main__':\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.test.main()\n"
  },
  {
    "path": "experiments/tf_trainer/common/model_trainer.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"The Model Trainer class.\n\nThis provides an abstraction of Keras and TF.Estimator, and is intended for use\nin text classification models (although it may generalize to other kinds of\nproblems).\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport json\nimport os\nimport os.path\nimport six\n\nimport tensorflow as tf\nfrom tensorflow.python.platform import tf_logging as logging\nfrom tensorflow.python.estimator import estimator as estimator_lib\nfrom tensorflow.python.estimator import model_fn as model_fn_lib\nfrom tensorflow.python.estimator.export.export_output import PredictOutput\nfrom tensorflow.python.framework import ops\nfrom tensorflow.python.framework import sparse_tensor as sparse_tensor_lib\nfrom tensorflow.python.ops import clip_ops\nfrom tensorflow.python.ops import sparse_ops\nfrom tensorflow.python.training import optimizer as optimizer_lib\nfrom tensorflow.python.lib.io import file_io\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import dataset_input as ds\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string('model_dir', None,\n                           \"Directory for the Estimator's model directory.\")\ntf.app.flags.DEFINE_string('warm_start_from', None,\n                           'Existing checkpoint from which to start training.')\ntf.app.flags.DEFINE_bool('enable_profiling', False,\n                         'Enable profiler hook in estimator.')\ntf.app.flags.DEFINE_integer(\n    'n_export', -1, 'Number of models to export.'\n    'If =-1, only the best checkpoint (wrt specified eval metric) is exported.'\n    'If =1, only the last checkpoint is exported.'\n    'If >1, we export `n_export` evenly-spaced checkpoints.')\ntf.app.flags.DEFINE_string('key_name', 'comment_key',\n                           'Name of a pass-thru integer id for batch scoring.')\n\ntf.app.flags.DEFINE_integer('train_steps', 100000,\n                            'The number of steps to train for.')\ntf.app.flags.DEFINE_integer('eval_period', 1000,\n                            'The number of steps per eval period.')\ntf.app.flags.DEFINE_integer('eval_steps', None,\n                            'Number of examples to eval for, default all.')\n\ntf.app.flags.mark_flag_as_required('model_dir')\n\n\n# Copied from:\n# https://stackoverflow.com/questions/49846207/tensorflow-estimator-warm-start-from-and-model-dir\nclass InitHook(tf.train.SessionRunHook):\n  \"\"\"Initializes model from a checkpoint_path\n  \n    Args:\n      checkpoint_dir: full path to dir containing the checkpoint\n  \"\"\"\n  def __init__(self, checkpoint_dir):\n    self.model_path = checkpoint_dir\n    self.initialized = False\n\n  def begin(self):\n    \"\"\"\n    Restore parameters if a pre-trained model is available and\n    we haven't trained previously.\n    \"\"\"\n    if not self.initialized:\n      #checkpoint = tf.train.latest_checkpoint(self.model_path)\n      all_checkpoints = file_io.get_matching_files(os.path.join(\n        self.model_path, 'model.ckpt-*.index'))\n\n      if not all_checkpoints:\n        raise ValueError('No checkpoint files found matching %s.' % (\n          self.model_path + '*'))\n\n      all_checkpoints = [x.replace('.index', '') for x in all_checkpoints]\n      all_checkpoints = sorted(all_checkpoints, key=lambda x: int(x.split('-')[-1]))\n      checkpoint = all_checkpoints[-1]\n\n      if checkpoint is None:\n        logging.info('No pre-trained model is available at %s, '\n          'training from scratch.' % self.model_path)\n      else:\n        logging.info('Pre-trained model {0} found in {1} - warmstarting.'.format(\n          checkpoint, self.model_path))\n        tf.train.warm_start(checkpoint)\n      self.initialized = True\n\n\n# This function extends tf.contrib.estimator.forward_features.\n# As the binary_head has a ClassificationOutput for serving_default,\n# the check at the end of 'new_model_fn' fails in the initial fn.\ndef forward_features(estimator, keys, sparse_default_values=None):\n  \"\"\"Forward features to predictions dictionary.\n\n  In some cases, user wants to see some of the features in estimators prediction\n  output. As an example, consider a batch prediction service: The service simply\n  runs inference on the users graph and returns the results. Keys are essential\n  because there is no order guarantee on the outputs so they need to be rejoined\n  to the inputs via keys or transclusion of the inputs in the outputs.\n  Example:\n  ```python\n    def input_fn():\n      features, labels = ...\n      features['unique_example_id'] = ...\n      features, labels\n    estimator = tf.estimator.LinearClassifier(...)\n    estimator = tf.contrib.estimator.forward_features(\n        estimator, 'unique_example_id')\n    estimator.train(...)\n    assert 'unique_example_id' in estimator.predict(...)\n  ```\n  Args:\n    estimator: A `tf.estimator.Estimator` object.\n    keys: A `string`\n    sparse_default_values: A dict of `str` keys mapping the name of the sparse\n      features to be converted to dense, to the default value to use. Only\n      sparse features indicated in the dictionary are converted to dense and the\n      provided default value is used.\n\n  Returns:\n      A new `tf.estimator.Estimator` which forwards features to predictions.\n  Raises:\n    ValueError:\n      * if `keys` is already part of `predictions`. We don't allow\n        override.\n      * if 'keys' does not exist in `features`.\n    TypeError: if `keys` type is not one of `string` or list/tuple of `string`.\n  \"\"\"\n\n  def verify_key_types(keys):  # pylint: disable=missing-docstring\n    if keys is None:\n      return keys\n    if isinstance(keys, six.string_types):\n      return [keys]\n    if not isinstance(keys, (list, tuple)):\n      raise TypeError('keys should be either a string or a list of strings. '\n                      'Given: {}'.format(type(keys)))\n    for key in keys:\n      if not isinstance(key, six.string_types):\n        raise TypeError('All items in the given keys list should be a string. '\n                        'There exist an item with type: {}'.format(type(key)))\n    return keys\n\n  def get_keys(features):\n    if keys is None:\n      return features.keys()\n    return keys\n\n  def verify_keys_and_predictions(features, predictions):\n    if not isinstance(predictions, dict):\n      raise ValueError(\n          'Predictions should be a dict to be able to forward features. '\n          'Given: {}'.format(type(predictions)))\n    for key in get_keys(features):\n      if key not in features:\n        raise ValueError(\n            'keys should be exist in features. Key \"{}\" is not in features '\n            'dict. features dict has following keys: {}. Please check '\n            'arguments of forward_features.'.format(key, features.keys()))\n      if key in predictions:\n        raise ValueError(\n            'Cannot forward feature key ({}). Since it does exist in '\n            'predictions. Existing prediction keys: {}. Please check arguments '\n            'of forward_features.'.format(key, predictions.keys()))\n\n  keys = verify_key_types(keys)\n\n  def new_model_fn(features, labels, mode, config):  # pylint: disable=missing-docstring\n    spec = estimator.model_fn(features, labels, mode, config)\n    predictions = spec.predictions\n    if predictions is None:\n      return spec\n    verify_keys_and_predictions(features, predictions)\n    for key in get_keys(features):\n      feature = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(\n          features[key])\n      if sparse_default_values and (key in sparse_default_values):\n        if not isinstance(feature, sparse_tensor_lib.SparseTensor):\n          raise ValueError(\n              'Feature ({}) is expected to be a `SparseTensor`.'.format(key))\n        feature = sparse_ops.sparse_tensor_to_dense(\n            feature, default_value=sparse_default_values[key])\n      if not isinstance(feature, ops.Tensor):\n        raise ValueError(\n            'Feature ({}) should be a Tensor. Please use `keys` '\n            'argument of forward_features to filter unwanted features, or'\n            'add key to argument `sparse_default_values`.'\n            'Type of features[{}] is {}.'.format(key, key, type(feature)))\n      predictions[key] = feature\n    spec = spec._replace(predictions=predictions)\n    if spec.export_outputs:  # CHANGES HERE\n      outputs = spec.export_outputs['predict'].outputs\n      outputs[key] = spec.predictions[key]\n      spec.export_outputs['predict'] = tf.estimator.export.PredictOutput(\n          outputs)\n      spec.export_outputs[\n          'serving_default'] = tf.estimator.export.PredictOutput(outputs)\n    return spec\n\n  return estimator_lib.Estimator(\n      model_fn=new_model_fn,\n      model_dir=estimator.model_dir,\n      config=estimator.config)\n\n\nclass ModelTrainer(object):\n  \"\"\"Model Trainer.\"\"\"\n\n  def __init__(self, dataset: ds.DatasetInput,\n               model: base_model.BaseModel,\n               warm_start_from: str = None) -> None:\n    self._dataset = dataset\n    self._model = model\n    self._warm_start_from = warm_start_from\n    self._estimator = model.estimator(self._model_dir())\n\n  def train_with_eval(self):\n    \"\"\"Train with periodic evaluation.\n    \"\"\"\n    training_hooks = None\n    if FLAGS.enable_profiling:\n      training_hooks = [\n          tf.train.ProfilerHook(\n              save_steps=10,\n              output_dir=os.path.join(self._model_dir(), 'profiler')),\n      ]\n\n    if self._warm_start_from:\n      init_hook = InitHook(checkpoint_dir=self._warm_start_from)\n      if training_hooks:\n        training_hooks.append(init_hook)\n      else:\n        training_hooks = [init_hook]\n\n    train_spec = tf.estimator.TrainSpec(\n        input_fn=self._dataset.train_input_fn,\n        max_steps=FLAGS.train_steps,\n        hooks=training_hooks)\n\n    eval_spec = tf.estimator.EvalSpec(\n        input_fn=self._dataset.validate_input_fn,\n        steps=FLAGS.eval_steps,\n        throttle_secs=1)\n\n    self._estimator._config = self._estimator.config.replace(\n        save_checkpoints_steps=FLAGS.eval_period)\n\n    if FLAGS.n_export > 1 or FLAGS.n_export == -1:\n      self._estimator._config = self._estimator.config.replace(\n          keep_checkpoint_max=None)\n\n    tf.estimator.train_and_evaluate(self._estimator, train_spec, eval_spec)\n\n  def predict_on_dev(self, predict_keys=None):\n    checkpoints, _ = self._get_list_checkpoint(1, self._model_dir(),\n                                                         None, None)\n    return self._estimator.predict(self._dataset.validate_input_fn,\n                                   predict_keys=predict_keys,\n                                   checkpoint_path=checkpoints[0])\n\n  def eval_dir(self):\n    return self._estimator.eval_dir()\n\n  def _model_dir(self):\n    \"\"\"Get Model Directory.\n\n    Used to scope logs to a given trial (when hyper param tuning) so that they\n    don't run over each other. When running locally it will just use the passed\n    in model_dir.\n    \"\"\"\n    return os.path.join(\n        FLAGS.model_dir,\n        json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get(\n            'trial', ''))\n\n  def _add_estimator_key(self, estimator, example_key_name):\n    \"\"\"Adds a forward key to the model_fn of an estimator.\"\"\"\n    estimator = forward_features(estimator, example_key_name)\n    return estimator\n\n\n  def _get_best_step_from_event_file(self,\n    event_file,\n    metrics_key,\n    is_first_metric_better_fn):\n    \"\"\"Find, in `event_file`, the step corresponding to the best metric.\n\n    Args:\n      event_file: The event file where to find the metrics.\n      metrics_key: The metric by which to determine the best checkpoint to save.\n      is_first_metric_better_fn: Comparison function to find best metric. Takes\n          in as arguments two numbers, returns true if first is better than\n          second. Default function says larger is better. Default value works for\n          AUC: higher is better.\n    \n    Returns:\n      Best step (int).\n    \"\"\"\n    if not metrics_key:\n      return None\n    best_metric = None\n    best_step = None\n    for e in tf.train.summary_iterator(event_file):\n      for v in e.summary.value:\n        if v.tag == metrics_key:\n          metric = v.simple_value\n          if not best_step or is_first_metric_better_fn(metric, best_metric):\n            best_metric = metric\n            best_step = e.step\n    return best_step\n\n\n  def _get_best_checkpoint(self,\n    checkpoints,\n    metrics_key,\n    is_first_metric_better_fn):\n    \"\"\"Find the best checkpoint, according to `metrics_key`.\n\n    Args:\n      checkpoints: List of model checkpoints.\n      metrics_key: The metric by which to determine the best checkpoint to save.\n      is_first_metric_better_fn: Comparison function to find best metric. Takes\n          in as arguments two numbers, returns true if first is better than\n          second. Default function says larger is better. Default value works for\n          AUC: higher is better.\n\n    Returns:\n      Best checkpoint path.\n    \"\"\"\n    eval_event_dir = self._estimator.eval_dir()\n\n    event_files = file_io.list_directory(eval_event_dir)\n    if not event_files:\n      raise ValueError('No event files found in directory %s.' % eval_event_dir)\n    if len(event_files) > 1:\n      print('Multiple event files found in dir %s. Using last one.' % eval_event_dir)\n    \n    event_file = os.path.join(eval_event_dir, event_files[-1])\n\n    # Use the best step to find the best checkpoint.\n    best_step = self._get_best_step_from_event_file(event_file, metrics_key,\n      is_first_metric_better_fn)\n    \n    # If we couldn't find metrics_key in the event file, try again using loss.\n    if best_step is None:\n      print(\"Metrics key %s not found in metrics, using 'loss' as metric key.\" %\n            metrics_key)\n      metrics_key = \"loss\"\n      # Want the checkpoint with the lowest loss\n      is_first_metric_better_fn = lambda x, y: x < y\n\n      best_step = self._get_best_step_from_event_file(event_file, metrics_key,\n        is_first_metric_better_fn)\n\n    if best_step is None:\n      raise ValueError(\"Couldn't find 'loss' metric in event file %s.\" % event_file)\n\n    best_checkpoint_path = None\n    for checkpoint_path in checkpoints:\n      version = int(checkpoint_path.split('-')[-1])\n      if version == best_step:\n        best_checkpoint_path = checkpoint_path\n\n    if not best_checkpoint_path:\n      raise ValueError(\"Couldn't find checkpoint for best_step = %d.\" % best_step)\n\n    return best_checkpoint_path\n\n\n  def _get_list_checkpoint(self,\n    n_export,\n    model_dir,\n    metrics_key,\n    is_first_metric_better_fn):\n    \"\"\"Get the checkpoints that we want to export, as well as the ones to clean up.\n\n    Args:\n      n_export: Number of models to export.\n      model_dir: Directory containing the checkpoints.\n      metrics_key: The metric by which to determine the best checkpoint to save.\n      is_first_metric_better_fn: Comparison function to find best metric. Takes\n          in as arguments two numbers, returns true if first is better than\n          second. Default function says larger is better. Default value works for\n          AUC: higher is better.\n\n    Returns:\n      Tuple of:\n        List of checkpoint paths to export,\n        Set of checkpoint paths to delete.\n\n    If n_export==1, we take only the last checkpoint.\n    If n_export==-1, we take the best checkpoint, according to `metrics_key` and\n      `is_first_metric_better_fn`. The remaining checkpoints are deleted.\n    Otherwise, we consider the list of steps for each for which we have a\n    checkpoint. Then we choose n_export number of checkpoints such that their\n    steps are as equidistant as possible.\n    \"\"\"\n    all_checkpoints = file_io.get_matching_files(\n        os.path.join(model_dir, 'model.ckpt-*.index'))\n\n    if not all_checkpoints:\n      raise ValueError('No checkpoint files found matching model.ckpt-*.index.')\n\n    all_checkpoints = [x.replace('.index', '') for x in all_checkpoints]\n    all_checkpoints = sorted(all_checkpoints, key=lambda x: int(x.split('-')[-1]))\n\n    # Keep track of the checkpoints to export, and the ones to delete.\n    checkpoints_to_export = None\n    checkpoints_to_delete = None\n\n    if n_export == 1:\n      checkpoints_to_export = [all_checkpoints[-1]]\n    elif n_export == -1:\n      checkpoints_to_export = [self._get_best_checkpoint(all_checkpoints, metrics_key,\n                                                         is_first_metric_better_fn)]\n    elif n_export > 1:\n      # We want to cover a distance of (len(checkpoints) - 1): for 3 points, we have a distance of 2.\n      # with a number of points of (n_export -1): because 1 point is set at the end.\n      step = float(len(all_checkpoints) - 1) / (n_export - 1)\n      if step <= 1:  # Fewer checkpoints available than the desired number.\n        return all_checkpoints, None\n\n      checkpoints_to_export = [\n          all_checkpoints[int(i * step)] for i in range(n_export - 1)\n      ]\n      checkpoints_to_export.append(all_checkpoints[-1])\n\n    if checkpoints_to_export:\n      checkpoints_to_delete = set(all_checkpoints) - set(checkpoints_to_export)\n\n    return checkpoints_to_export, checkpoints_to_delete\n\n\n  def export(self,\n    serving_input_fn,\n    example_key_name=None,\n    metrics_key=None,\n    is_first_metric_better_fn=lambda x, y: x > y,\n    delete_unexported_checkpoints=True):\n    \"\"\"Export model as a .pb.\n\n    Args:\n      serving_input_fn: An input function for inference graph.\n      example_key_name: Name of the example_key field (string).\n          If None, no example_key will be used.\n      metrics_key: The metric by which to determine the best checkpoint to save.\n      is_first_metric_better_fn: Comparison function to find best metric. Takes\n          in as arguments 3 numbers, returns true if first is better than\n          second. Default function says larger is better. Default value works for\n          AUC: higher is better.\n      delete_unexported_checkpoints: Boolean flag indicating whether or not to delete\n        the checkpoints that aren't exported. If False then all model checkpoints are\n        retained.\n\n      NOTE: if using a different metrics_key than AUC, make sure `is_first_metric_better_fn`\n        is updated accordingly.\n\n    Example keys are useful when doing batch predictions. Typically,\n      the predictions are done by a cluster of machines and the order of\n      the results is random. Here, we add a forward feature in the inference graph\n      (https://www.tensorflow.org/api_docs/python/tf/contrib/estimator/forward_features)\n      which will be used as an example unique identifier. In inference, the input\n      example includes an example_key field that is passed along by the estimator\n      and returned in the predictions.\n    \"\"\"\n    if FLAGS.n_export == -1:\n      if not is_first_metric_better_fn:\n        raise ValueError('Must provide valid `is_first_metric_better_fn` '\n          'when exporting best checkpoint.')\n      if not metrics_key:\n        print('No value provided for `metrics_key`. Using loss.')\n        metrics_key = 'loss'\n        is_first_metric_better_fn = lambda x, y: x < y\n\n    estimator = self._estimator\n    if example_key_name:\n      estimator = self._add_estimator_key(self._estimator, example_key_name)\n\n    checkpoints_to_export, checkpoints_to_delete = self._get_list_checkpoint(\n      FLAGS.n_export, self._model_dir(), metrics_key, is_first_metric_better_fn)\n\n    # Delete the checkpoints we don't want.\n    if checkpoints_to_delete and delete_unexported_checkpoints:\n      for ckpt in checkpoints_to_delete:\n        tf.train.remove_checkpoint(ckpt)\n\n    # Export the desired checkpoints.\n    if checkpoints_to_export:\n      for checkpoint_path in checkpoints_to_export:\n        version = checkpoint_path.split('-')[-1]\n        estimator.export_savedmodel(\n          export_dir_base=os.path.join(self._model_dir(), version),\n          serving_input_receiver_fn=serving_input_fn,\n          checkpoint_path=checkpoint_path)\n"
  },
  {
    "path": "experiments/tf_trainer/common/p100_config.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: CUSTOM\n  masterType: standard_p100\n  workerType: standard_p100\n  parameterServerType: large_model\n  workerCount: 1\n  parameterServerCount: 1"
  },
  {
    "path": "experiments/tf_trainer/common/serving_input.py",
    "content": "\"\"\"Serving functions for deployed model.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tensorflow.python.ops import array_ops\n\nFLAGS = tf.app.flags.FLAGS\n\ndef create_text_serving_input_fn(text_feature_name, example_key_name):\n\n  def serving_input_fn_tfrecords():\n    serialized_example = tf.placeholder(\n        shape=[None], dtype=tf.string, name=\"input_example_tensor\")\n    feature_spec = {\n        text_feature_name:\n            tf.FixedLenFeature([], dtype=tf.string),\n        example_key_name:\n            tf.FixedLenFeature([], dtype=tf.int64, default_value=-1)\n    }\n\n    features = tf.parse_example(serialized_example, feature_spec)\n\n    return tf.estimator.export.ServingInputReceiver(features,\n                                                    serialized_example)\n\n  return serving_input_fn_tfrecords\n\n\ndef create_serving_input_fn(word_to_idx,\n                            unknown_token,\n                            text_feature_name,\n                            example_key_name):\n\n  def serving_input_fn_tfrecords():\n\n    serialized_example = tf.placeholder(\n        shape=[None], dtype=tf.string, name=\"input_example_tensor\")\n    feature_spec = {\n        text_feature_name: tf.VarLenFeature(dtype=tf.string),\n        example_key_name: tf.FixedLenFeature([], dtype=tf.int64, default_value=-1)\n    }\n\n    features = tf.parse_example(serialized_example, feature_spec)\n\n    keys = list(word_to_idx.keys())\n    values = list(word_to_idx.values())\n    vocabulary_table = tf.contrib.lookup.HashTable(\n        tf.contrib.lookup.KeyValueTensorInitializer(\n            keys, values, key_dtype=tf.string, value_dtype=tf.int64),\n        unknown_token)\n    words_int_sparse = vocabulary_table.lookup(features[text_feature_name])\n    words_int_dense = tf.sparse_tensor_to_dense(\n        words_int_sparse, default_value=0)\n    features[text_feature_name] = words_int_dense\n\n    return tf.estimator.export.ServingInputReceiver(features,\n                                                    serialized_example)\n\n  return serving_input_fn_tfrecords\n"
  },
  {
    "path": "experiments/tf_trainer/common/text_preprocessor.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Text Preprocessor.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport functools\n\nfrom absl import flags\nimport numpy as np\nimport tensorflow as tf\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import types\nfrom tf_trainer.common.token_embedding_index import LoadTokenIdxEmbeddings\nfrom typing import Callable, Dict, List, Optional, Tuple\n\nFLAGS = flags.FLAGS\n\ntf.app.flags.DEFINE_bool('is_embedding_trainable', False,\n                         'Enable fine tuning of embeddings.')\n\n\nclass TextPreprocessor(object):\n  \"\"\"Text Preprocessor TensorFlow Estimator Extension.\n\n  Uses embedding indexes to create tensors that map tokens (provided by an\n  abstract tokenizer funtion) to embeddings.\n\n  Note: Due to the lack of text preprocessing functions in tensorflow, we expect\n  that the text is already preprocessed (list of words) in inference. In\n  training, due to the availability of tf.py_func, we can handle the\n  preprocessing.\n  \"\"\"\n\n  def __init__(self, embeddings_path: str) -> None:\n    self._word_to_idx, self._embeddings_matrix, self._unknown_token, self._embedding_size = \\\n      LoadTokenIdxEmbeddings(embeddings_path)  # type: Tuple[Dict[str, int], np.ndarray, int, int]\n\n  def train_preprocess_fn(self,\n                          tokenizer: Callable[[str], List[str]],\n                          lowercase: Optional[bool] = True\n                         ) -> Callable[[types.Tensor], types.Tensor]:\n\n    def _tokenize(text: bytes) -> np.ndarray:\n      \"\"\"Converts text to a list of words.\n\n      Args:\n        text: text to tokenize (string).\n        lowercase: whether to include lowercasing in preprocessing (boolean).\n        tokenizer: Python function to tokenize the text on.\n\n      Returns:\n        A list of strings (words).\n      \"\"\"\n\n      words = tokenizer(text.decode('utf-8'))\n      if lowercase:\n        words = [w.lower() for w in words]\n      return np.asarray(\n          [self._word_to_idx.get(w, self._unknown_token) for w in words],\n          dtype=np.int64)\n\n    def _preprocess_fn(text: types.Tensor) -> types.Tensor:\n      \"\"\"Converts a text into a list of integers.\n\n      Args:\n        text: a 0-D string Tensor.\n\n      Returns:\n        A 1-D int64 Tensor.\n      \"\"\"\n      words = tf.py_func(\n          _tokenize, [text], tf.int64, stateful=False, name='PreprocessFn')\n      return words\n\n    return _preprocess_fn\n\n  def add_embedding_to_model(self, model: base_model.BaseModel,\n                             text_feature_name: str) -> base_model.BaseModel:\n    \"\"\"Returns a new BaseModel with an embedding layer prepended.\n\n    Args:\n      model: An existing BaseModel instance.\n      text_feature_name: The name of the feature containing text.\n    \"\"\"\n    return model.map(\n        functools.partial(self.create_estimator_with_embedding,\n                          text_feature_name))\n\n  def create_estimator_with_embedding(\n      self, text_feature_name: str,\n      estimator: tf.estimator.Estimator) -> tf.estimator.Estimator:\n    \"\"\"Takes an existing estimator and prepends the embedding layers to it.\n\n    Args:\n      estimator: A predefined Estimator that expects embeddings.\n      text_feature_name: The name of the feature containing the text.\n\n    Returns:\n      TF Estimator with embedding ops added.\n\n    Note: We need to consider the case of large embeddings (see:\n      https://stackoverflow.com/questions/48217599/\n      how-to-initialize-embeddings-layer-within-estimator-api/48243086#48243086).\n    \"\"\"\n    old_model_fn = estimator.model_fn\n    old_config = estimator.config\n    old_params = estimator.params\n\n    def add_init_fn_to_estimatorSpec(estimator_spec, init_fn):\n      \"\"\"Add a new init_fn to the scaffold part of estimator spec.\"\"\"\n\n      def new_init_fn(scaffold, sess):\n        init_fn(scaffold, sess)\n        if estimator_spec.scaffold.init_fn:\n          estimator_spec.scaffold.init_fn(scaffold, sess)\n\n      scaffold = tf.train.Scaffold(\n          init_fn=new_init_fn, copy_from_scaffold=estimator_spec.scaffold)\n      estimator_spec_with_scaffold = tf.estimator.EstimatorSpec(\n          mode=estimator_spec.mode,\n          predictions=estimator_spec.predictions,\n          loss=estimator_spec.loss,\n          train_op=estimator_spec.train_op,\n          eval_metric_ops=estimator_spec.eval_metric_ops,\n          export_outputs=estimator_spec.export_outputs,\n          training_chief_hooks=estimator_spec.training_chief_hooks,\n          training_hooks=estimator_spec.training_hooks,\n          scaffold=scaffold,\n          evaluation_hooks=estimator_spec.evaluation_hooks,\n          prediction_hooks=estimator_spec.prediction_hooks)\n      return estimator_spec_with_scaffold\n\n    def new_model_fn(features, labels, mode, params, config):\n      \"\"\"model_fn used in defining the new TF Estimator\"\"\"\n\n      embeddings, embedding_init_fn = self.word_embeddings(\n          trainable=FLAGS.is_embedding_trainable)\n\n      text_feature = features[text_feature_name]\n      word_embeddings = tf.nn.embedding_lookup(embeddings, text_feature)\n      new_features = {text_feature_name: word_embeddings}\n\n      # Fix dimensions to make Keras model output match label dims.\n      if mode != tf.estimator.ModeKeys.PREDICT:\n        labels = {k: tf.expand_dims(v, -1) for k, v in labels.items()}\n\n      # TODO: Modify when embeddings are part of the model.\n      estimator_spec = old_model_fn(\n          new_features, labels, mode=mode, config=config)\n      estimator_spec_with_scaffold = add_init_fn_to_estimatorSpec(\n          estimator_spec, embedding_init_fn)\n\n      return estimator_spec_with_scaffold\n\n    return tf.estimator.Estimator(\n        new_model_fn, config=old_config, params=old_params)\n\n  def word_to_idx(self) -> Dict[str, int]:\n    return self._word_to_idx\n\n  def unknown_token(self) -> int:\n    return self._unknown_token\n\n  def word_embeddings(self, trainable) -> tf.Variable:\n    \"\"\"Get word embedding TF Variable.\"\"\"\n\n    embeddings = tf.get_variable(\n        'embeddings', self._embeddings_matrix.shape, trainable=trainable)\n\n    def init_fn(scaffold, sess):\n      sess.run(embeddings.initializer,\n               {embeddings.initial_value: self._embeddings_matrix})\n\n    return embeddings, init_fn\n"
  },
  {
    "path": "experiments/tf_trainer/common/text_preprocessor_test.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Tests for text_preprocessor.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tf_trainer.common import text_preprocessor\n\n\nclass TextPreprocessorTest(tf.test.TestCase):\n\n  def test_Tokenize(self):\n    preprocessor = text_preprocessor.TextPreprocessor(\n        'testdata/cats_and_dogs_onehot.vocab.txt')\n    with self.test_session() as session:\n      preprocess_fn = preprocessor.train_preprocess_fn(\n          tokenizer=lambda x: x.split(' '), lowercase=False)\n      tokens = preprocess_fn('dogs good cats bad rabbits not')\n      self.assertEqual(list(tokens.eval()), [1, 3, 2, 4, 7, 6])\n\n  def test_Lowercase(self):\n    preprocessor = text_preprocessor.TextPreprocessor(\n        'testdata/cats_and_dogs_onehot.vocab.txt')\n    with self.test_session() as session:\n      preprocess_fn = preprocessor.train_preprocess_fn(\n          tokenizer=lambda x: x.split(' '), lowercase=True)\n      tokens = preprocess_fn('Dogs GOOD Cats BAD rabbits not')\n      self.assertEqual(list(tokens.eval()), [1, 3, 2, 4, 7, 6])\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "experiments/tf_trainer/common/tfrecord_input.py",
    "content": "\"\"\"DatasetInput class based on TFRecord files.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport multiprocessing\nimport tensorflow as tf\nfrom typing import Callable, List, Dict, Tuple\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import dataset_input\nfrom tf_trainer.common import types\n\ntf.app.flags.DEFINE_string('train_path', None,\n                           'Path to the training data TFRecord file.')\ntf.app.flags.DEFINE_string('validate_path', None,\n                           'Path to the validation data TFRecord file.')\ntf.app.flags.DEFINE_string('labels', 'frac_neg',\n                           'Comma separated list of label features.')\ntf.app.flags.DEFINE_string(\n    'label_dtypes', None, 'Comma separated list of dtypes for labels. Each '\n    'dtype must be float or int. If not provided '\n    'assumes all labels are floats.')\ntf.app.flags.DEFINE_string('text_feature', 'comment_text',\n                           'Name of feature containing text input.')\ntf.app.flags.DEFINE_boolean('round_labels', True,\n                            'Round label features to 0 or 1 if true.')\ntf.app.flags.DEFINE_integer('batch_size', 256,\n                            'Batch sizes to use when reading.')\ntf.app.flags.DEFINE_integer(\n  'num_prefetch', 5,\n  'An optimization parameter for the number of elements to prefetch. See: '\n  'https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch')\n\nFLAGS = tf.app.flags.FLAGS\n\nDTYPE_MAPPING = {'float': tf.float32, 'int': tf.int64}\n\nDTYPE_DEFAULT = {'float': -1.0, 'int': -1}\n\n\nclass TFRecordInput(dataset_input.DatasetInput):\n  \"\"\"Simple no-preprocessing TFRecord based DatasetInput.\n\n  Handles parsing of TF Examples.\n\n  Regardless of which TF Example feature key is used, as specified by the\n  FLAGS.text_feature, the simple input will store the input text feature in\n  the feature key _text_feature.\n  \"\"\"\n\n  def __init__(self) -> None:\n    self._labels = FLAGS.labels.split(',')\n    if FLAGS.label_dtypes:\n      self._label_dtypes = FLAGS.label_dtypes.split(',')\n    else:\n      self._label_dtypes = ['float'] * len(self._labels)\n    self._batch_size = FLAGS.batch_size\n    self._num_prefetch = FLAGS.num_prefetch\n    self._text_feature = FLAGS.text_feature\n    self._round_labels = FLAGS.round_labels\n\n  def labels(self) -> List[str]:\n    \"\"\"List of the names of the float label features.\"\"\"\n    return self._labels\n\n  def text_feature(self) -> str:\n    \"\"\"Name of the feature containing the input text from examples.\"\"\"\n    return self._text_feature\n\n  def train_input_fn(self) -> tf.data.TFRecordDataset:\n    \"\"\"input_fn for TF Estimators for training set.\n\n    Automatically repeats over input data forever. We define epoch limits in the\n    model trainer.\n    \"\"\"\n    assert FLAGS.train_path\n    return self._input_fn_from_file(FLAGS.train_path).repeat()\n\n  def validate_input_fn(self) -> tf.data.TFRecordDataset:\n    \"\"\"input_fn for TF Estimators for validation set.\"\"\"\n    assert FLAGS.validate_path\n    return self._input_fn_from_file(FLAGS.validate_path)\n\n  def _keys_to_features(self):\n    keys_to_features = {}\n    keys_to_features[self._text_feature] = tf.FixedLenFeature([], tf.string)\n    for label, dtype in zip(self._labels, self._label_dtypes):\n      keys_to_features[label] = tf.FixedLenFeature([], DTYPE_MAPPING[dtype],\n                                                   DTYPE_DEFAULT[dtype])\n    return keys_to_features\n\n  def _input_fn_from_file(self, filepath: str) -> tf.data.TFRecordDataset:\n    filenames_dataset = tf.data.Dataset.list_files(filepath)\n    dataset = tf.data.TFRecordDataset(\n        filenames_dataset)  # type: tf.data.TFRecordDataset\n    parsed_dataset = dataset.map(\n        self._read_tf_example, num_parallel_calls=multiprocessing.cpu_count())\n    return parsed_dataset.batch(self._batch_size).prefetch(self._num_prefetch)\n\n  def _process_labels(self, features, parsed):\n    \"\"\"Applies rounding and computes weights tied to feature presence.\n\n    For all of the expected labels, if the value is negative, this\n    indicates a missing feature from the input. A corresponding\n    label name, suffixed by '_weight' will be added to the features\n    with a value of 1.0 is present, and 0.0 if absent. The label\n    value is rounded up or down (if enabled) and then mapped to\n    zero if missing.\n\n    Args:\n        features: the input features read from a TF Example.\n        parsed: the input labels read from a TF Example.\n\n    Returns:\n        A tuple of the features dict (with weights) and the labels dict.\n    \"\"\"\n    # Make a deep copy to avoid changing the input.\n    new_features = {k: v for k, v in features.items()}\n    labels = {}\n    for label in self._labels:\n      label_value = tf.cast(parsed[label], dtype=tf.float32)\n      # Missing values are negative, find them and zero those features out.\n      weight = tf.cast(tf.greater_equal(label_value, 0.0), dtype=tf.float32)\n      if self._round_labels:\n        label_value = tf.round(label_value)\n      new_features[label + '_weight'] = weight\n      labels[label] = tf.multiply(label_value, weight)\n    return new_features, labels\n\n  def _read_tf_example(\n      self,\n      record: tf.Tensor,\n  ) -> types.FeatureAndLabelTensors:\n    \"\"\"Parses TF Example protobuf into a text feature and labels.\n\n    The input TF Example has a text feature as a singleton list with the full\n    comment as the single element.\n    \"\"\"\n    parsed = tf.parse_single_example(\n        record, self._keys_to_features())  # type: Dict[str, types.Tensor]\n\n    features = {base_model.TEXT_FEATURE_KEY: parsed[self._text_feature]}\n    return self._process_labels(features, parsed)\n\n\nclass TFRecordInputWithTokenizer(TFRecordInput):\n  \"\"\"TFRecord based DatasetInput.\n\n  Handles parsing of TF Examples.\n\n  When handling text input, this class will rewrite the text input future,\n  using the preprocessing fn. That is, the text feature will be rewritten\n  as a new key in the output changing both the type and contents - from\n  a string to a tensor of in integers representing tokens of some kind.\n  TODO: preserve the original string and write a new key.\n  \"\"\"\n\n  def __init__(self,\n               train_preprocess_fn: Callable[[str], List[str]],\n               max_seq_len: int = 30000) -> None:\n    super().__init__()\n    self._train_preprocess_fn = train_preprocess_fn\n    self._max_seq_len = max_seq_len\n\n  def _input_fn_from_file(self, filepath: str) -> types.FeatureAndLabelTensors:\n\n    filenames_dataset = tf.data.Dataset.list_files(filepath)\n    dataset = tf.data.TFRecordDataset(\n        filenames_dataset)  # type: tf.data.TFRecordDataset\n\n    parsed_dataset = dataset.map(\n        self._read_tf_example, num_parallel_calls=multiprocessing.cpu_count())\n    parsed_dataset = parsed_dataset.filter(lambda x, _: tf.less(\n        x['sequence_length'], self._max_seq_len))\n\n    feature_shapes = {\n        base_model.TOKENS_FEATURE_KEY: [None],\n        'sequence_length': []\n    }\n    for label in self._labels:\n      feature_shapes[label + '_weight'] = []\n\n    padded_shapes = (\n      feature_shapes,\n      {label: [] for label in self._labels})  # type: Tuple[Dict, Dict]\n    parsed_dataset = parsed_dataset.apply(\n        tf.contrib.data.bucket_by_sequence_length(\n            element_length_func=lambda x, _: x['sequence_length'],\n            bucket_boundaries=[(i + 1) * 20 for i in range(10)],\n            bucket_batch_sizes=[self._batch_size] * 11,\n            padded_shapes=padded_shapes))\n    batched_dataset = parsed_dataset.prefetch(self._num_prefetch)\n    return batched_dataset\n\n  def _read_tf_example(\n      self,\n      record: tf.Tensor,\n  ) -> types.FeatureAndLabelTensors:\n    \"\"\"Parses TF Example protobuf into a text feature and labels.\n\n    The input TF Example has a text feature as a singleton list with the full\n    comment as the single element.\n    \"\"\"\n    parsed = tf.parse_single_example(\n        record, self._keys_to_features())  # type: Dict[str, types.Tensor]\n\n    text = parsed[self.text_feature()]\n    tokens = self._train_preprocess_fn(text)\n    features = {\n        base_model.TOKENS_FEATURE_KEY: tokens,\n        'sequence_length': tf.shape(tokens)[0],\n    }\n    return self._process_labels(features, parsed)\n"
  },
  {
    "path": "experiments/tf_trainer/common/tfrecord_input_test.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Tests for tfrecord_input.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.common import types\n\nFLAGS = tf.app.flags.FLAGS\n\n\nclass TFRecordInputTest(tf.test.TestCase):\n\n  def setUp(self):\n    FLAGS.text_feature = 'comment'\n    ex = tf.train.Example(\n        features=tf.train.Features(\n            feature={\n                'label':\n                    tf.train.Feature(\n                        float_list=tf.train.FloatList(value=[0.8])),\n                'ignored-label':\n                    tf.train.Feature(\n                        float_list=tf.train.FloatList(value=[0.125])),\n                'int_label':\n                    tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),\n                'comment':\n                    tf.train.Feature(\n                        bytes_list=tf.train.BytesList(\n                            value=['Hi there Bob'.encode('utf-8')]))\n            }))\n    self.ex_tensor = tf.convert_to_tensor(\n        ex.SerializeToString(), dtype=tf.string)\n\n  def test_TFRecordInput_unrounded(self):\n    FLAGS.round_labels = False\n    FLAGS.labels = 'label'\n    dataset_input = tfrecord_input.TFRecordInput()\n\n    with self.test_session():\n      features, labels = dataset_input._read_tf_example(self.ex_tensor)\n      self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(),\n                       b'Hi there Bob')\n      np.testing.assert_almost_equal(labels['label'].eval(), 0.8)\n      np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0)\n      self.assertCountEqual(list(labels), ['label'])\n      self.assertCountEqual(list(features), ['text', 'label_weight'])\n\n  def test_TFRecordInput_default_values(self):\n    FLAGS.labels = 'label,fake_label,int_label'\n    FLAGS.label_dtypes = 'float,float,int'\n    FLAGS.round_labels = False\n    dataset_input = tfrecord_input.TFRecordInput()\n\n    with self.test_session():\n      features, labels = dataset_input._read_tf_example(self.ex_tensor)\n      self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(),\n                       b'Hi there Bob')\n      np.testing.assert_almost_equal(labels['label'].eval(), 0.8)\n      np.testing.assert_almost_equal(labels['int_label'].eval(), 0.0)\n      np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0)\n      np.testing.assert_almost_equal(labels['fake_label'].eval(), 0.0)\n      np.testing.assert_almost_equal(features['fake_label_weight'].eval(), 0.0)\n\n  def test_TFRecordInput_rounded(self):\n    FLAGS.labels = 'label'\n    FLAGS.round_labels = True\n    dataset_input = tfrecord_input.TFRecordInput()\n\n    with self.test_session():\n      features, labels = dataset_input._read_tf_example(self.ex_tensor)\n      self.assertEqual(features[base_model.TEXT_FEATURE_KEY].eval(),\n                       b'Hi there Bob')\n      np.testing.assert_almost_equal(labels['label'].eval(), 1.0)\n      np.testing.assert_almost_equal(features['label_weight'].eval(), 1.0)\n\n\nclass TFRecordInputWithTokenizerTest(tf.test.TestCase):\n\n  def setUp(self):\n    FLAGS.text_feature = 'comment'\n    ex = tf.train.Example(\n        features=tf.train.Features(\n            feature={\n                'label':\n                    tf.train.Feature(\n                        float_list=tf.train.FloatList(value=[0.8])),\n                'int_label':\n                    tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),\n                'comment':\n                    tf.train.Feature(\n                        bytes_list=tf.train.BytesList(\n                            value=['Hi there Bob'.encode('utf-8')]))\n            }))\n    self.ex_tensor = tf.convert_to_tensor(\n        ex.SerializeToString(), dtype=tf.string)\n\n    self.word_to_idx = {'Hi': 12, 'there': 13}\n    self.unknown_token = 999\n\n  def preprocessor(self, text):\n    return tf.py_func(\n        lambda t: np.asarray([\n            self.word_to_idx.get(x, self.unknown_token)\n            for x in t.decode('utf-8').split(' ')\n        ]), [text], tf.int64)\n\n  def test_TFRecordInputWithTokenizer_unrounded(self):\n    FLAGS.labels = 'label,fake_label,int_label,fake_int_label'\n    FLAGS.label_dtypes = 'float,float,int,int'\n    FLAGS.round_labels = False\n    dataset_input = tfrecord_input.TFRecordInputWithTokenizer(\n        train_preprocess_fn=self.preprocessor)\n\n    with self.test_session():\n      features, labels = dataset_input._read_tf_example(self.ex_tensor)\n      self.assertEqual(\n          list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999])\n      self.assertAlmostEqual(labels['label'].eval(), 0.8)\n      self.assertAlmostEqual(labels['fake_label'].eval(), 0.0)\n      self.assertAlmostEqual(labels['int_label'].eval(), 0.0)\n      self.assertAlmostEqual(labels['fake_int_label'].eval(), 0.0)\n      self.assertAlmostEqual(features['label_weight'].eval(), 1.0)\n      self.assertAlmostEqual(features['fake_label_weight'].eval(), 0.0)\n      self.assertAlmostEqual(features['int_label_weight'].eval(), 1.0)\n      self.assertAlmostEqual(features['fake_int_label_weight'].eval(), 0.0)\n\n  def test_TFRecordInputWithTokenizer_default_values(self):\n    FLAGS.labels = 'label,fake_label'\n    FLAGS.round_labels = False\n    dataset_input = tfrecord_input.TFRecordInputWithTokenizer(\n        train_preprocess_fn=self.preprocessor)\n\n    with self.test_session():\n      features, labels = dataset_input._read_tf_example(self.ex_tensor)\n      self.assertEqual(\n          list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999])\n      self.assertAlmostEqual(labels['label'].eval(), 0.8)\n      self.assertAlmostEqual(labels['fake_label'].eval(), 0.0)\n      self.assertAlmostEqual(features['label_weight'].eval(), 1.0)\n      self.assertAlmostEqual(features['fake_label_weight'].eval(), 0.0)\n\n  def test_TFRecordInputWithTokenizer_rounded(self):\n    FLAGS.labels = 'label'\n    FLAGS.round_labels = True\n    dataset_input = tfrecord_input.TFRecordInputWithTokenizer(\n        train_preprocess_fn=self.preprocessor)\n\n    with self.test_session():\n      features, labels = dataset_input._read_tf_example(self.ex_tensor)\n      self.assertEqual(\n          list(features[base_model.TOKENS_FEATURE_KEY].eval()), [12, 13, 999])\n      self.assertEqual(labels['label'].eval(), 1.0)\n      self.assertEqual(features['label_weight'].eval(), 1.0)\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "experiments/tf_trainer/common/token_embedding_index.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Working with Token Embeding Indexes.\"\"\"\n\nfrom typing import Tuple, Dict, Optional, List, Callable\nimport numpy as np\nimport functools\nimport tensorflow as tf\n\ndef LoadTokenIdxEmbeddings(embeddings_path: str) \\\n  -> Tuple[Dict[str, int], np.ndarray, int, int]:\n  \"\"\"Generate word to idx mapping and word embeddings numpy array.\n\n  We have two levels of indirection (e.g. word to idx and then idx to\n  embedding) which could reduce embedding size if multiple words map to the\n  same idx; although this is not currently a real or useful use-case.\n\n  Args:\n    embeddings_path: Local, GCS, or HDFS path to embedding file. Each line\n      should be a word and its vector representation separated by a space.\n\n  Returns:\n    Tuple of:\n      A vocabulary dictionary (mapping words to their index)\n      A Numpy array of word embeddings with shape (vocab size, embedding size)\n      A unique unknown token index (greater than all other token indexes)\n      The size of the embeddings for words that is being used\n  \"\"\"\n  word_to_idx = {}\n  word_embeddings = []\n\n  if not tf.gfile.Exists(embeddings_path):\n    raise ValueError('File at %s does not exist.' % embeddings_path)\n\n  with tf.gfile.Open(embeddings_path) as f:\n    for idx, line in enumerate(f):\n      values = line.split()\n      word = values[0]\n      word_embedding = np.asarray(values[1:], dtype='float32')\n      word_to_idx[word] = idx + 1  # Reserve first row for padding\n      word_embeddings.append(word_embedding)\n\n  if not word_embeddings:\n    raise ValueError('No embeddings loaded from %s.' % embeddings_path)\n\n  # Add the padding \"embedding\"\n  word_embeddings.insert(0, np.random.randn(len(word_embeddings[0])))\n\n  # Convert embedding to numpy array and append the unknown word embedding,\n  # which is the mean of all other embeddings.\n  unknown_token = len(word_embeddings)\n  embeddings_matrix = np.asarray(word_embeddings, dtype=np.float32)\n  embeddings_matrix = np.append(\n      embeddings_matrix, [embeddings_matrix.mean(axis=0)], axis=0)\n\n  return word_to_idx, embeddings_matrix, unknown_token, len(word_embeddings[0])\n"
  },
  {
    "path": "experiments/tf_trainer/common/token_embedding_index_test.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Tests for tfrecord_input.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom tf_trainer.common.token_embedding_index import LoadTokenIdxEmbeddings\n\n\nclass LoadTokenIdxEmbeddingsTest(tf.test.TestCase):\n\n  def test_LoadTokenIdxEmbeddings(self):\n    idx, embeddings, unknown_idx, embedding_size = LoadTokenIdxEmbeddings(\n        'testdata/cats_and_dogs_onehot.vocab.txt')\n    self.assertEqual(embedding_size, 6)\n    self.assertEqual(unknown_idx, 7)\n    self.assertEqual(idx['dogs'], 1)\n    self.assertEqual(idx['cats'], 2)\n    self.assertEqual(idx['not'], 6)\n    self.assertEqual(embeddings[1][0], 1.0)\n    self.assertEqual(embeddings[1][1], 0.0)\n    # Note: padding embedding will be random, and is index 0. Also the unknown\n    # token embedding will be random, and is index n+1; 7 in this case.\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "experiments/tf_trainer/common/types.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Types for the tf_trainer module.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom typing import NewType, Union, Dict, Tuple\n\n# Type aliases for convenience.\n\nTensor = Union[tf.Tensor, tf.SparseTensor]\nTensorDict = Dict[str, Tensor]\nTensorOrTensorDict = Union[tf.Tensor, TensorDict]\nFeatureAndLabelTensors = Tuple[TensorOrTensorDict, TensorOrTensorDict]\n\n# See: https://www.tensorflow.org/api_docs/python/tf/estimator/TrainSpec\nEstimatorInput = Union[FeatureAndLabelTensors, tf.data.Dataset]\n"
  },
  {
    "path": "experiments/tf_trainer/common/v100_config.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: CUSTOM\n  masterType: standard_v100\n"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/hparam_config.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  # scaleTier: CUSTOM\n  # masterType: standard\n  # workerType: standard_gpu\n  # parameterServerType: large_model\n  # workerCount: 1\n  # parameterServerCount: 1\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/toxicity # TODO: change based on dataset\n    maxTrials: 120\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 1\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: INTEGER\n        minValue: 16\n        maxValue: 256\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/hparam_config_civil_comments.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/toxicity\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n        - 256\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/hparam_config_many_communities.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/removed\n    maxTrials: 150\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n        - 256\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/hparam_config_toxicity.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/frac_neg\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/model.py",
    "content": "\"\"\"Tensorflow Estimator Character CNN.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tensorflow.python.keras import layers\nfrom tf_trainer.common import base_model\nfrom typing import Set\n\nFLAGS = tf.app.flags.FLAGS\n\n# Hyperparameters\n# TODO: Add validation\ntf.app.flags.DEFINE_float('learning_rate', 0.0001,\n                          'The learning rate to use during training.')\ntf.app.flags.DEFINE_float('dropout_rate', 0.25,\n                          'The dropout rate to use during training.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning.\n# TODO: add link to relevant public issue/bug/documentation?\ntf.app.flags.DEFINE_string(\n    'filter_sizes', '5,7,9,11',\n    'Comma delimited string for the sizes of convolution filters.')\ntf.app.flags.DEFINE_integer(\n    'num_filters', 16,\n    'Number of convolutional filters for every convolutional layer.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning.\n# TODO: add link to relevant public issue/bug/documentation?\ntf.app.flags.DEFINE_string(\n    'dense_units', '256,512,128',\n    'Comma delimited string for the number of hidden units in the dense layer.')\ntf.app.flags.DEFINE_integer(\n    'embedding_size', 32,\n    'The number of dimensions in the character embedding.')\ntf.app.flags.DEFINE_string('pooling_type', 'max', 'Average or max pooling.')\ntf.app.flags.DEFINE_integer('string_len', 1500,\n                            'The length to truncate or pad to.')\n\n\nclass TFCharCNNModel(base_model.BaseModel):\n  \"\"\"TF Character CNN Model\n\n  TF implementation of a Character CNN. Inputs should be strings.\n  \"\"\"\n\n  def __init__(self, target_labels: Set[str]) -> None:\n    self._target_labels = target_labels\n\n  @staticmethod\n  def hparams():\n    filter_sizes = [int(units) for units in FLAGS.filter_sizes.split(',')]\n    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]\n    hparams = tf.contrib.training.HParams(\n        learning_rate=FLAGS.learning_rate,\n        dropout_rate=FLAGS.dropout_rate,\n        filter_sizes=filter_sizes,\n        num_filters=FLAGS.num_filters,\n        dense_units=dense_units,\n        embedding_size=FLAGS.embedding_size,\n        pooling_type=FLAGS.pooling_type,\n        string_len=FLAGS.string_len)\n    return hparams\n\n  def estimator(self, model_dir):\n    estimator = tf.estimator.Estimator(\n        model_fn=self._model_fn,\n        params=self.hparams(),\n        config=tf.estimator.RunConfig(model_dir=model_dir))\n    return estimator\n\n  def _model_fn(self, features, labels, mode, params, config):\n    embedding = tf.Variable(\n        tf.truncated_normal([256, params.embedding_size]),\n        name='char_embedding')\n    texts = features[base_model.TEXT_FEATURE_KEY]\n    batch_size = tf.shape(texts)[0]\n    byte_ids = tf.reshape(\n        tf.cast(\n            tf.decode_raw(\n                tf.sparse_tensor_to_dense(\n                    tf.string_split(texts, ''), default_value='\\0'), tf.uint8),\n            tf.int32), [batch_size, -1])\n    padded_ids = tf.slice(\n        tf.concat(\n            [byte_ids,\n             tf.zeros([batch_size, params.string_len], tf.int32)],\n            axis=1), [0, 0], [batch_size, params.string_len])\n\n    inputs = tf.nn.embedding_lookup(params=embedding, ids=padded_ids)\n\n    # Conv\n    X = inputs\n    for filter_size in params.filter_sizes:\n      X = layers.Conv1D(\n          params.num_filters, filter_size, activation='relu', padding='same')(\n              X)\n    if params.pooling_type == 'average':\n      X = layers.GlobalAveragePooling1D()(X)\n    elif params.pooling_type == 'max':\n      X = layers.GlobalMaxPooling1D()(X)\n    else:\n      raise ValueError('Unrecognized pooling type parameter')\n\n    # FC\n    logits = X\n    for num_units in params.dense_units:\n      logits = tf.layers.dense(\n          inputs=logits, units=num_units, activation=tf.nn.relu)\n      logits = tf.layers.dropout(logits, rate=params.dropout_rate)\n\n    logits = tf.layers.dense(\n        inputs=logits, units=len(self._target_labels), activation=None)\n\n    output_heads = [\n        tf.contrib.estimator.binary_classification_head(name=name)\n        for name in self._target_labels\n    ]\n    multihead = tf.contrib.estimator.multi_head(output_heads)\n\n    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)\n    return multihead.create_estimator_spec(\n        features=features,\n        labels=labels,\n        mode=mode,\n        logits=logits,\n        optimizer=optimizer)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/run.deploy.sh",
    "content": "#!/bin/bash\n# Deploys a saved model on Cloud MLE.\n\nif [ \"$1\" == \"civil_comments\" ] || [ \"$1\" == \"toxicity\" ] || [ \"$1\" == \"many_communities\" ] ; then\n    \n    MODEL_NAME=tf_char_cnn_$1\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    exit 1\nfi\n\n\n# By default, the model is the last one from the user.\nMODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)\n\n# Create a new model.\n# Will raise an error if the model already exists.\ngcloud ml-engine models create $MODEL_NAME \\\n  --regions us-central1\n\n# Deploy a model version.\nMODEL_VERSION=v_$(date +\"%Y%m%d_%H%M%S\")\ngcloud ml-engine versions create $MODEL_VERSION \\\n  --model $MODEL_NAME \\\n  --origin $MODEL_SAVED_PATH \\\n  --runtime-version 1.10\n"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/run.hyperparameter.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_char_cnn\"\nMODEL_NAME_DATA=${MODEL_NAME}_$1\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.10 \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    --config=\"tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml\" \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --embedding_size=300 \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --is_embedding_trainable=False \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False\n\necho \"Model dir:\"\necho ${JOB_DIR}/model_dir\n"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/run.local.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\n\npython -m tf_trainer.tf_char_cnn.run \\\n  --train_path=$train_path \\\n  --validate_path=$valid_path \\\n  --model_dir=\"tf_char_cnn_local_model_dir\" \\\n  --labels=$labels \\\n  --label_dtypes=$label_dtypes\n"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/run.ml_engine.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_char_cnn\"\nMODEL_NAME_DATA=${MODEL_NAME}_$1_glove\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.10 \\\n    --scale-tier 'BASIC_GPU' \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --python-version \"3.5\" \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --is_embedding_trainable=False \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False \\\n    --batch_size=32\n\necho \"Model dir:\"\necho ${JOB_DIR}/model_dir\n"
  },
  {
    "path": "experiments/tf_trainer/tf_char_cnn/run.py",
    "content": "\"\"\"Experiments with toxicity, civil_comments, many_communities datasets.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.tf_char_cnn import model as tf_char_cnn\n\nFLAGS = tf.app.flags.FLAGS\n\n\ndef main(argv):\n  del argv  # unused\n\n  dataset = tfrecord_input.TFRecordInput()\n\n  model = tf_char_cnn.TFCharCNNModel(dataset.labels())\n\n  trainer = model_trainer.ModelTrainer(dataset, model)\n  trainer.train_with_eval()\n\n  serving_input_fn = serving_input.create_text_serving_input_fn(\n      text_feature_name=base_model.TEXT_FEATURE_KEY,\n      example_key_name=base_model.EXAMPLE_KEY)\n  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,\n    metrics_key=\"auc/%s\" % FLAGS.labels.split(',')[0])\n\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/finetune.py",
    "content": "\"\"\"Experiments with many_communities dataset.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport nltk\nimport os\nimport pandas as pd\nimport tensorflow as tf\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import text_preprocessor\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.common import types\nfrom tf_trainer.tf_cnn import model as tf_cnn\n\nfrom tensorflow.python.lib.io import file_io\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string(\"embeddings_path\",\n                           \"local_data/glove.6B/glove.6B.100d.txt\",\n                           \"Path to the embeddings file.\")\n\ntf.app.flags.DEFINE_string(\"tmp_results_path\", None,\n                           \"Path to the local combined (across communities) results file.\")\n\ntf.app.flags.mark_flag_as_required(\"warm_start_from\")\ntf.app.flags.mark_flag_as_required(\"tmp_results_path\")\n\ndef main(argv):\n  del argv  # unused\n\n  embeddings_path = FLAGS.embeddings_path\n\n  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)\n\n  nltk.download(\"punkt\")\n  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)\n  dataset = tfrecord_input.TFRecordInputWithTokenizer(\n      train_preprocess_fn=train_preprocess_fn)\n\n  # TODO: Move embedding *into* Keras model.\n  model_tf = tf_cnn.TFCNNModel(dataset.labels())\n  model = preprocessor.add_embedding_to_model(model_tf,\n                                              base_model.TOKENS_FEATURE_KEY)\n\n  trainer = model_trainer.ModelTrainer(dataset, model,\n    warm_start_from=FLAGS.warm_start_from)\n  trainer.train_with_eval()\n\n  keys = [(\"label\", \"probabilities\")]\n  predictions = list(trainer.predict_on_dev(predict_keys=keys))\n\n  valid_path_csv = FLAGS.validate_path.replace(\"..tfrecord\", \".csv\")\n  df = pd.read_csv(valid_path_csv)\n  labels = df[\"label\"].values\n  community = os.path.basename(FLAGS.validate_path).split(\"..\")[0]\n\n  assert len(labels) == len(predictions), \\\n    \"Labels and predictions must have the same length.\"\n\n  d = {\n    \"label\" : labels,\n    \"prediction\": [p[keys[0]][1] for p in predictions],\n    \"community\": [community for p in predictions],\n  }\n\n  df = pd.DataFrame(data=d)\n  df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+',\n    index=False, header=False)\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/finetune.sh",
    "content": "#!/bin/bash\n\nBASE_PATH=\"gs://conversationai-models\"\nGCS_RESOURCES=\"${BASE_PATH}/resources\"\n\nwarm_start_from=\"gs://conversationai-models/tf_trainer_runs/msushkov/tf_cnn_many_communities_40_per_8_shot_glove/20190723_110543/model_dir\"\neval_steps=1\neval_period=5\n\nlabels=\"label\"\nlabel_dtypes=\"int\"\ntext_feature=\"text\"\n\nbatch_size=24\ndense_units=\"64,64\"\nfilter_sizes=\"3,4,5\"\nnum_filters=128\ndropout_rate=0.33976339995062715\npooling_type=\"max\"\n\nif [ \"$1\" == \"test\" ]; then\n\tVALIDATION_OR_TEST=\"test\"\n\n\t# Best hparams found on the validation set\n\tlearning_rate_lst=(0.00035725183171118115)\n\ttrain_steps_lst=(5)\n\nelse\n\tVALIDATION_OR_TEST=\"validation\"\n\n    # original, original/2, original/5, original/10, original*2\n\tlearning_rate_lst=(0.00035725183171118115 0.00017862591 0.00007145036 0.000035725183171118115 0.00071450366)\n\ttrain_steps_lst=(5 10 50)\nfi\n\ncombined_results_dir=\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/$VALIDATION_OR_TEST\"\ntrain_dir=\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord\"\n\nfor learning_rate in \"${learning_rate_lst[@]}\"; do\n\techo \"Learning rate: $learning_rate\"\n\n\tfor train_steps in \"${train_steps_lst[@]}\"; do\n\t\techo \"Train steps: $train_steps\"\n\n\t\ttmp_results_fname=\"tf_cnn_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv\"\n\t\ttmp_results_path=\"/tmp/$tmp_results_fname\"\n\n\t\trm $tmp_results_path\n\n\t\tCOUNTER=0\n\t\tfor train_path in `gcloud storage ls $train_dir`; do\n\t\t\techo \"Community $COUNTER out of 170...\"\n\t\t\t\n\t\t\tvalid_path=${train_path/${VALIDATION_OR_TEST}_episodes\\/support/${VALIDATION_OR_TEST}_episodes\\/query}\n\n\t\t\trm -rf \"tf_cnn_local_model_dir\"\n\n\t\t\tpython -m tf_trainer.tf_cnn.finetune \\\n\t\t\t    --model_dir=\"tf_cnn_local_model_dir\" \\\n\t\t\t    --train_path=$train_path \\\n\t\t\t    --validate_path=$valid_path \\\n\t\t\t    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt\" \\\n\t\t\t    --is_embedding_trainable=False \\\n\t\t\t    --train_steps=$train_steps \\\n\t\t\t    --eval_period=$eval_period \\\n\t\t\t    --eval_steps=$eval_steps \\\n\t\t\t    --labels=$labels \\\n\t\t\t    --label_dtypes=$label_dtypes \\\n\t\t\t    --preprocess_in_tf=False \\\n\t\t\t    --batch_size=$batch_size \\\n\t\t\t    --dense_units=$dense_units \\\n\t\t\t    --filter_sizes=$filter_sizes \\\n\t\t\t    --num_filters=$num_filters \\\n\t\t\t    --dropout_rate=$dropout_rate \\\n\t\t\t    --learning_rate=$learning_rate \\\n\t\t\t    --pooling_type=$pooling_type \\\n\t\t\t    --text_feature=$text_feature \\\n\t\t\t    --warm_start_from=$warm_start_from \\\n\t\t\t    --tmp_results_path=$tmp_results_path\n\n\t\t\tCOUNTER=$[$COUNTER +1]\n\t\tdone\n\n\t\tgcloud storage cp $tmp_results_path $combined_results_dir\n\n\tdone\ndone"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/hparam_config.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  # scaleTier: CUSTOM\n  # masterType: standard\n  # workerType: standard_gpu\n  # parameterServerType: large_model\n  # workerCount: 1\n  # parameterServerCount: 1\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/toxicity # TODO: change based on dataset\n    maxTrials: 120\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 1\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: INTEGER\n        minValue: 16\n        maxValue: 256\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/hparam_config_civil_comments.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/toxicity\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n        - 256\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/hparam_config_many_communities.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/removed\n    maxTrials: 150\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n        - 256\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/hparam_config_many_communities_40_per_8_shot.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/label\n    maxTrials: 150\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/hparam_config_toxicity.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/frac_neg\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: filter_sizes \n        type: CATEGORICAL \n        categoricalValues:\n        - '5,5'\n        - '3,4,5'\n      - parameterName: num_filters\n        type: DISCRETE\n        discreteValues:\n        - 64\n        - 128\n      - parameterName: dense_units \n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: pooling_type \n        type: CATEGORICAL\n        categoricalValues:\n        - 'average'\n        - 'max'"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/model.py",
    "content": "\"\"\"Tensorflow Estimator CNN.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tensorflow.python.keras import layers\nfrom tf_trainer.common import base_model\nfrom typing import Set\n\nFLAGS = tf.app.flags.FLAGS\n\n# Hyperparameters\n# TODO: Add validation\ntf.app.flags.DEFINE_float('learning_rate', 0.00003,\n                          'The learning rate to use during training.')\ntf.app.flags.DEFINE_float('dropout_rate', 0.3,\n                          'The dropout rate to use during training.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning.\n# TODO: add link to relevant public issue/bug/documentation?\ntf.app.flags.DEFINE_string(\n    'filter_sizes', '5',\n    'Comma delimited string for the sizes of convolution filters.')\ntf.app.flags.DEFINE_integer(\n    'num_filters', 128,\n    'Number of convolutional filters for every convolutional layer.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning.\n# TODO: add link to relevant public issue/bug/documentation?\ntf.app.flags.DEFINE_string(\n    'dense_units', '128',\n    'Comma delimited string for the number of hidden units in the dense layer.')\ntf.app.flags.DEFINE_integer('embedding_size', 300,\n                            'The number of dimensions in the word embedding.')\ntf.app.flags.DEFINE_string('pooling_type', 'average', 'Average or max pooling.')\n\n\nclass TFCNNModel(base_model.BaseModel):\n  \"\"\"TF CNN Model\n\n  TF implementation of a CNN. Inputs should be\n  sequences of word embeddings.\n  \"\"\"\n\n  def __init__(self, target_labels: Set[str]) -> None:\n    self._target_labels = target_labels\n\n  @staticmethod\n  def hparams():\n    filter_sizes = [int(units) for units in FLAGS.filter_sizes.split(',')]\n    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]\n    hparams = tf.contrib.training.HParams(\n        learning_rate=FLAGS.learning_rate,\n        dropout_rate=FLAGS.dropout_rate,\n        filter_sizes=filter_sizes,\n        num_filters=FLAGS.num_filters,\n        dense_units=dense_units,\n        embedding_size=FLAGS.embedding_size,\n        pooling_type=FLAGS.pooling_type)\n    return hparams\n\n  def estimator(self, model_dir):\n    estimator = tf.estimator.Estimator(\n        model_fn=self._model_fn,\n        params=self.hparams(),\n        config=tf.estimator.RunConfig(model_dir=model_dir))\n    return estimator\n\n  def _model_fn(self, features, labels, mode, params, config):\n    inputs = features[base_model.TOKENS_FEATURE_KEY]\n    batch_size = tf.shape(inputs)[0]\n\n    # Conv\n    X = inputs\n    for filter_size in params.filter_sizes:\n      X = layers.Conv1D(\n          params.num_filters, filter_size, activation='relu', padding='same')(\n              X)\n    if params.pooling_type == 'average':\n      X = layers.GlobalAveragePooling1D()(X)\n    elif params.pooling_type == 'max':\n      X = layers.GlobalMaxPooling1D()(X)\n    else:\n      raise ValueError('Unrecognized pooling type parameter')\n\n    # FC\n    logits = X\n    for num_units in params.dense_units:\n      logits = tf.layers.dense(\n          inputs=logits, units=num_units, activation=tf.nn.relu)\n      logits = tf.layers.dropout(logits, rate=params.dropout_rate)\n\n    logits = tf.layers.dense(\n        inputs=logits, units=len(self._target_labels), activation=None)\n\n    output_heads = [\n        tf.contrib.estimator.binary_classification_head(name=name)\n        for name in self._target_labels\n    ]\n    multihead = tf.contrib.estimator.multi_head(output_heads)\n\n    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)\n    return multihead.create_estimator_spec(\n        features=features,\n        labels=labels,\n        mode=mode,\n        logits=logits,\n        optimizer=optimizer)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/run.deploy.sh",
    "content": "#!/bin/bash\n# Deploys a saved model on Cloud MLE.\n\nif [ \"$1\" == \"civil_comments\" ] || [ \"$1\" == \"toxicity\" ] || [ \"$1\" == \"many_communities\" ] ; then\n    \n    MODEL_NAME=tf_cnn_$1_glove\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    exit 1\nfi\n\n\n# By default, the model is the last one from the user.\nMODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)\n\n# Create a new model.\n# Will raise an error if the model already exists.\ngcloud ml-engine models create $MODEL_NAME \\\n  --regions us-central1\n\n# Deploy a model version.\nMODEL_VERSION=v_$(date +\"%Y%m%d_%H%M%S\")\ngcloud ml-engine versions create $MODEL_VERSION \\\n  --model $MODEL_NAME \\\n  --origin $MODEL_SAVED_PATH \\\n  --runtime-version 1.10\n"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/run.hyperparameter.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_cnn\"\nMODEL_NAME_DATA=${MODEL_NAME}_$1_glove\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.12 \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    --config=\"tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml\" \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt\" \\\n    --embedding_size=300 \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --is_embedding_trainable=False \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False \\\n    --text_feature=$text_feature\n\necho \"Model dir:\"\necho ${JOB_DIR}/model_dir\n"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/run.local.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\n\npython -m tf_trainer.tf_cnn.run \\\n  --train_path=$train_path \\\n  --validate_path=$valid_path \\\n  --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt\" \\\n  --model_dir=\"tf_cnn_local_model_dir\" \\\n  --labels=$labels \\\n  --label_dtypes=$label_dtypes\n"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/run.ml_engine.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_cnn\"\nMODEL_NAME_DATA=${MODEL_NAME}_$1_glove\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\nif [ \"$1\" == \"civil_comments\" ]; then\n    batch_size=128\n    dense_units='128,128'\n    filter_sizes='3,4,5'\n    num_filters=128\n    dropout_rate=0.01527361736403272\n    learning_rate=0.0001932910006772403\n    pooling_type='average'\n    train_steps=50000\n    eval_period=1000\n    eval_steps=2000\n\nelif [ \"$1\" == \"toxicity\" ]; then\n    batch_size=128\n    dense_units='64'\n    filter_sizes='3,4,5'\n    num_filters=128\n    dropout_rate=0.59761635967002524\n    learning_rate=0.00028233147441192243\n    pooling_type='max'\n    train_steps=55000\n    eval_period=1000\n    eval_steps=1500\n\nelif [ \"$1\" == \"many_communities\" ]; then\n    batch_size=128\n    dense_units='128,128'\n    filter_sizes='3,4,5'\n    num_filters=128\n    dropout_rate=0.42090135248508892\n    learning_rate=8.8262915612024245e-05\n    pooling_type='average'\n    train_steps=700000\n    eval_period=4000\n    eval_steps=45000\n\nelif [ \"$1\" == \"many_communities_40_per_8_shot\" ]; then\n\n    train_steps=8000\n    eval_steps=250\n    eval_period=200\n\n    if [ \"$2\" == \"optimistic\" ]; then\n\n        batch_size=64\n        dense_units='64'\n        filter_sizes='3,4,5'\n        num_filters=128\n        dropout_rate=0.50444323963758519\n        learning_rate=0.00016448334200861331\n        pooling_type='max'\n\n    elif [ \"$2\" == \"pessimistic\" ]; then\n        \n        batch_size=32\n        dense_units='64,64'\n        filter_sizes='3,4,5'\n        num_filters=128\n        dropout_rate=0.33976339995062715\n        learning_rate=0.00035725183171118115\n        pooling_type='max'\n\n    else\n        echo \"Must provide second positional argument.\"\n        exit 1\n    fi\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    return;\nfi\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.10 \\\n    --scale-tier 'BASIC_GPU' \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --python-version \"3.5\" \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt\" \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --is_embedding_trainable=False \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False \\\n    --batch_size=$batch_size \\\n    --dense_units=$dense_units \\\n    --filter_sizes=$filter_sizes \\\n    --num_filters=$num_filters \\\n    --dropout_rate=$dropout_rate \\\n    --learning_rate=$learning_rate \\\n    --pooling_type=$pooling_type \\\n    --text_feature=$text_feature\n\necho \"Model dir:\"\necho ${JOB_DIR}/model_dir\n"
  },
  {
    "path": "experiments/tf_trainer/tf_cnn/run.py",
    "content": "\"\"\"Experiments with toxicity, civil_comments, many_communities datasets.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport nltk\nimport tensorflow as tf\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import text_preprocessor\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.common import types\nfrom tf_trainer.tf_cnn import model as tf_cnn\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string(\"embeddings_path\",\n                           \"local_data/glove.6B/glove.6B.100d.txt\",\n                           \"Path to the embeddings file.\")\n\ndef main(argv):\n  del argv  # unused\n\n  embeddings_path = FLAGS.embeddings_path\n\n  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)\n\n  nltk.download(\"punkt\")\n  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)\n  dataset = tfrecord_input.TFRecordInputWithTokenizer(\n      train_preprocess_fn=train_preprocess_fn)\n\n  # TODO: Move embedding *into* Keras model.\n  model_tf = tf_cnn.TFCNNModel(dataset.labels())\n  model = preprocessor.add_embedding_to_model(model_tf,\n                                              base_model.TOKENS_FEATURE_KEY)\n\n  trainer = model_trainer.ModelTrainer(dataset, model)\n  trainer.train_with_eval()\n\n  serving_input_fn = serving_input.create_serving_input_fn(\n      word_to_idx=preprocessor._word_to_idx,\n      unknown_token=preprocessor._unknown_token,\n      text_feature_name=base_model.TOKENS_FEATURE_KEY,\n      example_key_name=base_model.EXAMPLE_KEY)\n  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,\n    metrics_key=\"auc/%s\" % FLAGS.labels.split(',')[0])\n\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/finetune.py",
    "content": "\"\"\"Experiments with many_communities dataset.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport nltk\nimport os\nimport pandas as pd\nimport tensorflow as tf\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import text_preprocessor\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.common import types\nfrom tf_trainer.tf_gru_attention import model as tf_gru_attention\n\nfrom tensorflow.python.lib.io import file_io\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string(\"embeddings_path\",\n                           \"local_data/glove.6B/glove.6B.100d.txt\",\n                           \"Path to the embeddings file.\")\n\ntf.app.flags.DEFINE_string(\"tmp_results_path\", None,\n                           \"Path to the local combined (across communities) results file.\")\n\ntf.app.flags.mark_flag_as_required(\"warm_start_from\")\ntf.app.flags.mark_flag_as_required(\"tmp_results_path\")\n\ndef main(argv):\n  del argv  # unused\n\n  embeddings_path = FLAGS.embeddings_path\n\n  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)\n\n  nltk.download(\"punkt\")\n  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)\n  dataset = tfrecord_input.TFRecordInputWithTokenizer(\n      train_preprocess_fn=train_preprocess_fn)\n\n  # TODO: Move embedding *into* Keras model.\n  model_tf = tf_gru_attention.TFRNNModel(dataset.labels())\n  model = preprocessor.add_embedding_to_model(model_tf,\n                                              base_model.TOKENS_FEATURE_KEY)\n\n  trainer = model_trainer.ModelTrainer(dataset, model,\n    warm_start_from=FLAGS.warm_start_from)\n  trainer.train_with_eval()\n\n  keys = [(\"label\", \"probabilities\")]\n  predictions = list(trainer.predict_on_dev(predict_keys=keys))\n\n  valid_path_csv = FLAGS.validate_path.replace(\"..tfrecord\", \".csv\")\n  df = pd.read_csv(valid_path_csv)\n  labels = df[\"label\"].values\n  community = os.path.basename(FLAGS.validate_path).split(\"..\")[0]\n\n  assert len(labels) == len(predictions), \\\n    \"Labels and predictions must have the same length.\"\n\n  d = {\n    \"label\" : labels,\n    \"prediction\": [p[keys[0]][1] for p in predictions],\n    \"community\": [community for p in predictions],\n  }\n\n  df = pd.DataFrame(data=d)\n  df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+',\n    index=False, header=False)\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/finetune.sh",
    "content": "#!/bin/bash\n\nBASE_PATH=\"gs://conversationai-models\"\nGCS_RESOURCES=\"${BASE_PATH}/resources\"\n\nwarm_start_from=\"gs://conversationai-models/tf_trainer_runs/msushkov/tf_gru_attention_many_communities_40_per_8_shot_glove/20190723_110533/model_dir\"\neval_steps=1\neval_period=5\n\nlabels=\"label\"\nlabel_dtypes=\"int\"\ntext_feature=\"text\"\n    \nbatch_size=24\nattention_units=64\ndropout_rate=0.052541994248873507\ndense_units='128,128'\ngru_units='128'\n\nif [ \"$1\" == \"test\" ]; then\n\tVALIDATION_OR_TEST=\"test\"\n\n\t# Best hparams found on the validation set\n\tlearning_rate_lst=(0.000049418814574477758)\n\ttrain_steps_lst=(50)\n\nelse\n\tVALIDATION_OR_TEST=\"validation\"\n\n\t# original, original/2, original/5, original/10, original*2, original/20, original/50\n\t#learning_rate_lst=(0.00049418814574477758 0.00024709407 0.00009883762 0.000049418814574477758 0.00098837629 0.0000247094 0.00000988376)\n\n\t# original*4, original*10, original*20\n\tlearning_rate_lst=(0.00197675258 0.0049418814574477758 0.00988376291)\n\ttrain_steps_lst=(5 10 50 100)\nfi\n\ncombined_results_dir=\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/$VALIDATION_OR_TEST\"\ntrain_dir=\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord\"\n\nfor learning_rate in \"${learning_rate_lst[@]}\"; do\n\techo \"Learning rate:\"\n\techo $learning_rate\n\n\tfor train_steps in \"${train_steps_lst[@]}\"; do\n\t\techo \"Train steps:\"\n\t\techo $train_steps\n\n\t\ttmp_results_fname=\"tf_gru_attention_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv\"\n\t\ttmp_results_path=\"/tmp/$tmp_results_fname\"\n\n\t\trm $tmp_results_path\n\n\t\tCOUNTER=0\n\t\tfor train_path in `gcloud storage ls $train_dir`; do\n\t\t\t\n\t\t\tvalid_path=${train_path/${VALIDATION_OR_TEST}_episodes\\/support/${VALIDATION_OR_TEST}_episodes\\/query}\n\n\t\t\trm -rf \"tf_gru_attention_local_model_dir\"\n\n\t\t\tpython -m tf_trainer.tf_gru_attention.finetune \\\n\t\t\t    --model_dir=\"tf_gru_attention_local_model_dir\" \\\n\t\t\t    --train_path=$train_path \\\n\t\t\t    --validate_path=$valid_path \\\n\t\t\t    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt\" \\\n\t\t\t    --is_embedding_trainable=False \\\n\t\t\t    --train_steps=$train_steps \\\n\t\t\t    --eval_period=$eval_period \\\n\t\t\t    --eval_steps=$eval_steps \\\n\t\t\t    --labels=$labels \\\n\t\t\t    --label_dtypes=$label_dtypes \\\n\t\t\t    --preprocess_in_tf=False \\\n\t\t\t    --batch_size=$batch_size \\\n\t\t\t    --attention_units=$attention_units \\\n\t\t\t    --dropout_rate=$dropout_rate \\\n\t\t\t    --learning_rate=$learning_rate \\\n\t\t\t    --dense_units=$dense_units \\\n\t\t\t    --gru_units=$gru_units \\\n\t\t\t    --text_feature=$text_feature \\\n\t\t\t    --warm_start_from=$warm_start_from \\\n\t\t\t    --tmp_results_path=$tmp_results_path\n\n\t\t\tCOUNTER=$[$COUNTER +1]\n\t\tdone\n\n\t\tgcloud storage cp $tmp_results_path $combined_results_dir\n\n\tdone\ndone"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/hparam_config.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: CUSTOM\n  masterType: standard\n  workerType: standard_gpu\n  parameterServerType: large_model\n  workerCount: 1\n  parameterServerCount: 1\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/frac_neg\n    maxTrials: 40\n    maxParallelTrials: 4\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 1\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: INTEGER\n        minValue: 16\n        maxValue: 128\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: gru_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '256'            \n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: attention_units\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n        - 124\n        - 256\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/hparam_config_civil_comments.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/toxicity\n    maxTrials: 200\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n      - parameterName: gru_units\n        type: CATEGORICAL\n        categoricalValues:          \n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: attention_units\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n        - 128\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/hparam_config_many_communities.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/removed\n    maxTrials: 200\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n      - parameterName: gru_units\n        type: CATEGORICAL\n        categoricalValues:         \n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: attention_units\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n        - 128\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/hparam_config_many_communities_40_per_8_shot.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/label\n    maxTrials: 200\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n      - parameterName: gru_units\n        type: CATEGORICAL\n        categoricalValues:         \n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: attention_units\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n        - 128\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/hparam_config_toxicity.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/frac_neg\n    maxTrials: 200\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n      - parameterName: gru_units\n        type: CATEGORICAL\n        categoricalValues:           \n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'\n      - parameterName: attention_units\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n        - 128\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '64'\n        - '64,64'"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/model.py",
    "content": "\"\"\"Tensorflow Estimator implementation of RNN Model with Attention\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tf_trainer.common import base_model\nfrom typing import Set\n\nFLAGS = tf.app.flags.FLAGS\n\n# Hyperparameters\n# TODO: Add validation\ntf.app.flags.DEFINE_float('learning_rate', 0.00003,\n                          'The learning rate to use during training.')\ntf.app.flags.DEFINE_float('dropout_rate', 0.3,\n                          'The dropout rate to use during training.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning.\ntf.app.flags.DEFINE_string(\n    'gru_units', '128',\n    'Comma delimited string for the number of hidden units in the gru layer.')\ntf.app.flags.DEFINE_integer('attention_units', 64,\n                            'The number of hidden units in the gru layer.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning.\ntf.app.flags.DEFINE_string(\n    'dense_units', '128',\n    'Comma delimited string for the number of hidden units in the dense layer.')\n\n\ndef attend(inputs, attention_size, attention_depth=1):\n  \"\"\"Attention layer.\"\"\"\n\n  sequence_length = tf.shape(inputs)[1]  # dynamic\n  final_layer_size = inputs.shape[2]  # static\n\n  x = tf.reshape(inputs, [-1, final_layer_size])\n  for _ in range(attention_depth - 1):\n    x = tf.layers.dense(x, attention_size, activation=tf.nn.relu)\n  x = tf.layers.dense(x, 1, activation=None)\n  logits = tf.reshape(x, [-1, sequence_length, 1])\n  alphas = tf.nn.softmax(logits, dim=1)\n\n  output = tf.reduce_sum(inputs * alphas, 1)\n\n  return output, alphas\n\n\nclass TFRNNModel(base_model.BaseModel):\n\n  def __init__(self, target_labels: Set[str]) -> None:\n    self._target_labels = target_labels\n\n  @staticmethod\n  def hparams():\n    gru_units = [int(units) for units in FLAGS.gru_units.split(',')]\n    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]\n    hparams = tf.contrib.training.HParams(\n        learning_rate=FLAGS.learning_rate,\n        dropout_rate=FLAGS.dropout_rate,\n        gru_units=gru_units,\n        attention_units=FLAGS.attention_units,\n        dense_units=dense_units)\n    return hparams\n\n  def estimator(self, model_dir):\n    estimator = tf.estimator.Estimator(\n        model_fn=self._model_fn,\n        params=self.hparams(),\n        config=tf.estimator.RunConfig(model_dir=model_dir))\n    return estimator\n\n  def _model_fn(self, features, labels, mode, params, config):\n    inputs = features[base_model.TOKENS_FEATURE_KEY]\n    batch_size = tf.shape(inputs)[0]\n\n    rnn_layers = [\n        tf.nn.rnn_cell.GRUCell(num_units=size, activation=tf.nn.tanh)\n        for size in params.gru_units\n    ]\n\n    # create a RNN cell composed sequentially of a number of RNNCells\n    multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)\n\n    # TODO: make bidirectional\n    outputs, states = tf.nn.dynamic_rnn(\n        multi_rnn_cell, inputs, dtype=tf.float32)\n\n    # TODO: Handle sequence length in the attention layer (via a mask).\n    #       Padded elements should not be part of the average.\n    logits, _ = attend(inputs=outputs, attention_size=params.attention_units)\n\n    for num_units in params.dense_units:\n      logits = tf.layers.dense(\n          inputs=logits, units=num_units, activation=tf.nn.relu)\n      logits = tf.layers.dropout(logits, rate=params.dropout_rate)\n    logits = tf.layers.dense(\n        inputs=logits, units=len(self._target_labels), activation=None)\n\n    output_heads = [\n        tf.contrib.estimator.binary_classification_head(name=name)\n        for name in self._target_labels\n    ]\n    multihead = tf.contrib.estimator.multi_head(output_heads)\n\n    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)\n    return multihead.create_estimator_spec(\n        features=features,\n        labels=labels,\n        mode=mode,\n        logits=logits,\n        optimizer=optimizer)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/run.deploy.sh",
    "content": "#!/bin/bash\n# Deploys a saved model on Cloud MLE.\n\nif [ \"$1\" == \"civil_comments\" ] || [ \"$1\" == \"toxicity\" ] || [ \"$1\" == \"many_communities\" ] ; then\n    \n    MODEL_NAME=tf_gru_attention_$1_glove\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    exit 1\nfi\n\n\n# By default, the model is the last one from the user.\nMODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)\n\n# Create a new model.\n# Will raise an error if the model already exists.\ngcloud ml-engine models create $MODEL_NAME \\\n  --regions us-central1\n\n# Deploy a model version.\nMODEL_VERSION=v_$(date +\"%Y%m%d_%H%M%S\")\ngcloud ml-engine versions create $MODEL_VERSION \\\n  --model $MODEL_NAME \\\n  --origin $MODEL_SAVED_PATH \\\n  --runtime-version 1.10\n"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/run.hyperparameter.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_gru_attention\"\nMODEL_NAME_DATA=\"${MODEL_NAME}_$1_glove\"\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.12 \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    --config=\"tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml\" \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt\" \\\n    --embedding_size=300 \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --is_embedding_trainable=False \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False \\\n    --text_feature=$text_feature\n\necho \"Model dir:\"\necho ${JOB_DIR}/model_dir\n"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/run.local.sh",
    "content": "#!/bin/bash\n\n# Note:\n# We currently use 2 different embeddings:\n# - glove.6B/glove.6B.300d.txt\n# - google-news/GoogleNews-vectors-negative300.txt\n# Glove assumes all words are lowercased, while Google-news handles different casing.\n# As there is currently no tf operation that perform lowercasing, we have the following \n# requirements:\n# - For google news: Run preprocess_in_tf=True (no lowercasing).\n# - For glove.6B, Run preprocess_in_tf=False (will force lowercasing).\n\nsource \"tf_trainer/common/dataset_config.sh\"\n\npython -m tf_trainer.tf_gru_attention.run \\\n  --train_path=$train_path \\\n  --validate_path=$valid_path \\\n  --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt\" \\\n  --model_dir=\"tf_gru_attention_local_model_dir\" \\\n  --labels=$labels \\\n  --label_dtypes=$label_dtypes \\\n  --preprocess_in_tf=False \\\n  --text_feature=$text_feature"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/run.ml_engine.sh",
    "content": "#!/bin/bash\n# This script runs one training job on Cloud MLE.\n\n# Note:\n# We currently use 2 different embeddings:\n# - glove.6B/glove.6B.300d.txt\n# - google-news/GoogleNews-vectors-negative300.txt\n# Glove assumes all words are lowercased, while Google-news handles different casing.\n# As there is currently no tf operation that perform lowercasing, we have the following \n# requirements:\n# - For google news: Run preprocess_in_tf=True (no lowercasing).\n# - For glove.6B, Run preprocess_in_tf=False (will force lowercasing).\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_gru_attention\"\nMODEL_NAME_DATA=${MODEL_NAME}_$1_glove\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\nif [ \"$1\" == \"civil_comments\" ]; then\n    batch_size=128\n    attention_units=32\n    dropout_rate=0.60960359286224075\n    learning_rate=0.0010256671195808884\n    dense_units='128'\n    gru_units='128,128'\n    train_steps=50000\n    eval_period=1000\n    eval_steps=2000\n    config=\"tf_trainer/common/basic_gpu_config.yaml\"\n    text_feature=\"comment_text\"\n\nelif [ \"$1\" == \"toxicity\" ]; then\n    batch_size=32\n    attention_units=32\n    dropout_rate=0.69999994803861521\n    learning_rate=0.00030340058446715442\n    dense_units='128'\n    gru_units='128,128'\n    train_steps=250000\n    eval_period=1000\n    eval_steps=6000\n    config=\"tf_trainer/common/basic_gpu_config.yaml\"\n    text_feature=\"comment_text\"\n\nelif [ \"$1\" == \"many_communities\" ]; then\n    batch_size=128\n    attention_units=32\n    dropout_rate=0.38471142580880757\n    learning_rate=0.000755324856537066\n    dense_units='128'\n    gru_units='128'\n    train_steps=700000\n    eval_period=4000\n    eval_steps=45000\n    config=\"tf_trainer/common/p100_config.yaml\"\n    text_feature=\"comment_text\"\n\nelif [ \"$1\" == \"many_communities_40_per_8_shot\" ]; then\n\n    train_steps=8000\n    eval_steps=250\n    eval_period=200\n    config=\"tf_trainer/common/basic_gpu_config.yaml\"\n\n    if [ \"$2\" == \"optimistic\" ]; then\n\n        batch_size=64\n        attention_units=32\n        dropout_rate=0.69778643162683085\n        learning_rate=0.00080291321858594659\n        dense_units='128,128'\n        gru_units='128'\n\n    elif [ \"$2\" == \"pessimistic\" ]; then\n        \n        batch_size=32\n        attention_units=64\n        dropout_rate=0.052541994248873507\n        learning_rate=0.00049418814574477758\n        dense_units='128,128'\n        gru_units='128'\n\n    else\n        echo \"Must provide second positional argument.\"\n        exit 1\n    fi\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    return;\nfi\n\ngcloud ai-platform jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.10 \\\n    --config $config \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.100d.txt\" \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False \\\n    --batch_size=$batch_size \\\n    --attention_units=$attention_units \\\n    --dropout_rate=$dropout_rate \\\n    --learning_rate=$learning_rate \\\n    --dense_units=$dense_units \\\n    --gru_units=$gru_units \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --text_feature=$text_feature\n"
  },
  {
    "path": "experiments/tf_trainer/tf_gru_attention/run.py",
    "content": "\"\"\"Experiments with Toxicity Dataset\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport nltk\nimport tensorflow as tf\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import text_preprocessor\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.common import types\nfrom tf_trainer.tf_gru_attention import model as tf_gru_attention\n\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string(\"embeddings_path\",\n                           \"local_data/glove.6B/glove.6B.100d.txt\",\n                           \"Path to the embeddings file.\")\n\n\ndef main(argv):\n  del argv  # unused\n\n  embeddings_path = FLAGS.embeddings_path\n\n  preprocessor = text_preprocessor.TextPreprocessor(embeddings_path)\n\n  nltk.download(\"punkt\")\n  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)\n  dataset = tfrecord_input.TFRecordInputWithTokenizer(\n      train_preprocess_fn=train_preprocess_fn)\n\n  # TODO: Move embedding *into* Keras model.\n  model_tf = tf_gru_attention.TFRNNModel(dataset.labels())\n  model = preprocessor.add_embedding_to_model(model_tf,\n                                              base_model.TOKENS_FEATURE_KEY)\n\n  trainer = model_trainer.ModelTrainer(dataset, model)\n  trainer.train_with_eval()\n\n  serving_input_fn = serving_input.create_serving_input_fn(\n      word_to_idx=preprocessor._word_to_idx,\n      unknown_token=preprocessor._unknown_token,\n      text_feature_name=base_model.TOKENS_FEATURE_KEY,\n      example_key_name=base_model.EXAMPLE_KEY)\n  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,\n    metrics_key=\"auc/%s\" % FLAGS.labels.split(',')[0])\n\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/finetune.py",
    "content": "\"\"\"Experiments with many_communities dataset.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.tf_hub_classifier import model as tf_hub_classifier\n\nimport os\nimport pandas as pd\nimport tensorflow as tf\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string(\"embeddings_path\",\n                           \"local_data/glove.6B/glove.6B.100d.txt\",\n                           \"Path to the embeddings file.\")\n\ntf.app.flags.DEFINE_string(\"tmp_results_path\", None,\n                           \"Path to the local combined (across communities) results file.\")\n\ntf.app.flags.mark_flag_as_required(\"warm_start_from\")\ntf.app.flags.mark_flag_as_required(\"tmp_results_path\")\n\ndef main(argv):\n  del argv  # unused\n\n  dataset = tfrecord_input.TFRecordInput()\n  model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())\n\n  trainer = model_trainer.ModelTrainer(dataset, model,\n    warm_start_from=FLAGS.warm_start_from)\n  trainer.train_with_eval()\n\n  keys = [(\"label\", \"probabilities\")]\n  predictions = list(trainer.predict_on_dev(predict_keys=keys))\n\n  valid_path_csv = FLAGS.validate_path.replace(\"..tfrecord\", \".csv\")\n  df = pd.read_csv(valid_path_csv)\n  labels = df[\"label\"].values\n  community = os.path.basename(FLAGS.validate_path).split(\"..\")[0]\n\n  assert len(labels) == len(predictions), \\\n    \"Labels and predictions must have the same length.\"\n\n  d = {\n    \"label\" : labels,\n    \"prediction\": [p[keys[0]][1] for p in predictions],\n    \"community\": [community for p in predictions],\n  }\n\n  df = pd.DataFrame(data=d)\n  df.to_csv(path_or_buf=FLAGS.tmp_results_path, mode='a+',\n    index=False, header=False)\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/finetune.sh",
    "content": "#!/bin/bash\n\nBASE_PATH=\"gs://conversationai-models\"\nGCS_RESOURCES=\"${BASE_PATH}/resources\"\n\nwarm_start_from=\"gs://conversationai-models/tf_trainer_runs/msushkov/tf_hub_classifier_many_communities_40_per_8_shot/20190723_110557/model_dir\"\n\neval_steps=1\neval_period=5\n\nlabels=\"label\"\nlabel_dtypes=\"int\"\ntext_feature=\"text\"\n    \nbatch_size=24\ndropout_rate=0.53291173797826941\ndense_units='256,128,64'\n\nif [ \"$1\" == \"test\" ]; then\n\tVALIDATION_OR_TEST=\"test\"\n\n\t# Best hparams found on the validation set\n\tlearning_rate_lst=(0.00001238498)\n\ttrain_steps_lst=(50)\n\nelse\n\tVALIDATION_OR_TEST=\"validation\"\n\n\t# original, original/2, original/5, original/10, original*2\n\tlearning_rate_lst=(6.1924912697697353e-06 0.00000309624 0.00000123849 6.1924912697697353e-07 0.00001238498)\n\ttrain_steps_lst=(5 10 50)\nfi\n\ntrain_dir=\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/${VALIDATION_OR_TEST}_episodes/support/*.tfrecord\"\ncombined_results_dir=\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/$VALIDATION_OR_TEST\"\n\nfor learning_rate in \"${learning_rate_lst[@]}\"; do\n\techo \"Learning rate:\"\n\techo $learning_rate\n\n\tfor train_steps in \"${train_steps_lst[@]}\"; do\n\t\techo \"Train steps:\"\n\t\techo $train_steps\n\n\t\ttmp_results_fname=\"tf_hub_classifier_finetuning_baseline_trainsteps_${train_steps}_lrate_${learning_rate}_msushkov.csv\"\n\t\ttmp_results_path=\"/tmp/$tmp_results_fname\"\n\n\t\trm $tmp_results_path\n\n\t\tCOUNTER=0\n\t\tfor train_path in `gcloud storage ls $train_dir`; do\n\t\t\t\n\t\t\tvalid_path=${train_path/${VALIDATION_OR_TEST}_episodes\\/support/${VALIDATION_OR_TEST}_episodes\\/query}\n\n\t\t\trm -rf \"tf_hub_classifier_local_model_dir\"\n\n\t\t\tpython -m tf_trainer.tf_hub_classifier.finetune \\\n\t\t\t    --model_dir=\"tf_hub_classifier_local_model_dir\" \\\n\t\t\t    --train_path=$train_path \\\n\t\t\t    --validate_path=$valid_path \\\n\t\t\t    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt\" \\\n\t\t\t    --is_embedding_trainable=False \\\n\t\t\t    --train_steps=$train_steps \\\n\t\t\t    --eval_period=$eval_period \\\n\t\t\t    --eval_steps=$eval_steps \\\n\t\t\t    --labels=$labels \\\n\t\t\t    --label_dtypes=$label_dtypes \\\n\t\t\t    --preprocess_in_tf=False \\\n\t\t\t    --batch_size=$batch_size \\\n\t\t\t    --dense_units=$dense_units \\\n\t\t\t    --dropout_rate=$dropout_rate \\\n\t\t\t    --learning_rate=$learning_rate \\\n\t\t\t    --text_feature=$text_feature \\\n\t\t\t    --warm_start_from=$warm_start_from \\\n\t\t\t    --tmp_results_path=$tmp_results_path\n\n\t\t\tCOUNTER=$[$COUNTER +1]\n\t\tdone\n\n\t\tgcloud storage cp $tmp_results_path $combined_results_dir\n\n\tdone\ndone"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/hparam_config.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: CUSTOM\n  masterType: standard\n  workerType: standard_gpu\n  parameterServerType: large_model\n  workerCount: 1\n  parameterServerCount: 1\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/frac_neg\n    maxTrials: 40\n    maxParallelTrials: 4\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 1\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: INTEGER\n        minValue: 16\n        maxValue: 256\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '512,128,64'\n        - '128,64,64'\n        - '128,64'\n        - '512,64'\n        - '128,128,128,64'\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/hparam_config_civil_comments.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/toxicity\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '512,128,64'\n        - '128,64,64'\n        - '128,64'\n        - '512,64'\n        - '128,128,128,64'\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/hparam_config_many_communities.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/removed\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '512,128,64'\n        - '128,64,64'\n        - '128,64'\n        - '512,64'\n        - '128,128,128,64'\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/hparam_config_many_communities_40_per_8_shot.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/label\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 32\n        - 64\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '256,128,64'\n        - '128,64,64'\n        - '128,64'\n        - '512,64'\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/hparam_config_toxicity.yaml",
    "content": "trainingInput:\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/frac_neg\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: FALSE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 0.7\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '512,128,64'\n        - '128,64,64'\n        - '128,64'\n        - '512,64'\n        - '128,128,128,64'\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/model.py",
    "content": "\"\"\"Tensorflow Estimator using TF Hub universal sentence encoder.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nimport tensorflow_hub as hub\nfrom tf_trainer.common import base_model\nfrom typing import List\n\nFLAGS = tf.app.flags.FLAGS\n\n# Hyperparameters\n# TODO: Add validation\ntf.app.flags.DEFINE_float('learning_rate', 0.00003,\n                          'The learning rate to use during training.')\ntf.app.flags.DEFINE_float('dropout_rate', 0.15,\n                          'The dropout rate to use during training.')\ntf.app.flags.DEFINE_string(\n    'model_spec',\n    'https://tfhub.dev/google/universal-sentence-encoder-large/3',\n    'The url of the TF Hub sentence encoding module to use.')\ntf.app.flags.DEFINE_bool('trainable', True,\n                         'What to pass for the TF Hub trainable parameter.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning. The length of the list\n# determines the number of layers, and the size of each layer.\ntf.app.flags.DEFINE_string(\n    'dense_units', '1024,1024,512',\n    'Comma delimited string for the number of hidden units in the dense layers.'\n)\n\n\nclass TFHubClassifierModel(base_model.BaseModel):\n\n  def __init__(self, target_labels: List[str]) -> None:\n    self._target_labels = target_labels\n\n  @staticmethod\n  def hparams():\n    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]\n    hparams = tf.contrib.training.HParams(\n        learning_rate=FLAGS.learning_rate,\n        dropout_rate=FLAGS.dropout_rate,\n        dense_units=dense_units)\n    return hparams\n\n  def estimator(self, model_dir):\n    estimator = tf.estimator.Estimator(\n        model_fn=self._model_fn,\n        params=self.hparams(),\n        config=tf.estimator.RunConfig(model_dir=model_dir))\n    return estimator\n\n  def _model_fn(self, features, labels, mode, params, config):\n    embedded_text_feature_column = hub.text_embedding_column(\n        key=base_model.TEXT_FEATURE_KEY,\n        module_spec=FLAGS.model_spec,\n        trainable=FLAGS.trainable)\n    inputs = tf.feature_column.input_layer(features,\n                                           [embedded_text_feature_column])\n\n    batch_size = tf.shape(inputs)[0]\n\n    logits = inputs\n    for num_units in params.dense_units:\n      logits = tf.layers.dense(\n          inputs=logits, units=num_units, activation=tf.nn.relu)\n      logits = tf.layers.dropout(logits, rate=params.dropout_rate)\n    logits = tf.layers.dense(\n        inputs=logits, units=len(self._target_labels), activation=None)\n\n    output_heads = [\n        tf.contrib.estimator.binary_classification_head(\n            name=name, weight_column=name + '_weight')\n        for name in self._target_labels\n    ]\n    multihead = tf.contrib.estimator.multi_head(output_heads)\n\n    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)\n    return multihead.create_estimator_spec(\n        features=features,\n        labels=labels,\n        mode=mode,\n        logits=logits,\n        optimizer=optimizer)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/run.deploy.sh",
    "content": "#!/bin/bash\n# Deploys a saved model on Cloud MLE.\n\nif [ \"$1\" == \"civil_comments\" ] || [ \"$1\" == \"toxicity\" ] || [ \"$1\" == \"many_communities\" ] ; then\n    \n    MODEL_NAME=tf_hub_classifier_$1\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    exit 1\nfi\n\n\n# By default, the model is the last one from the user.\nMODEL_SAVED_PATH=$(gcloud storage ls gs://conversationai-models/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)\n\n# Create a new model.\n# Will raise an error if the model already exists.\ngcloud ml-engine models create $MODEL_NAME \\\n  --regions us-central1\n\n# Deploy a model version.\nMODEL_VERSION=v_$(date +\"%Y%m%d_%H%M%S\")\ngcloud ml-engine versions create $MODEL_VERSION \\\n  --model $MODEL_NAME \\\n  --origin $MODEL_SAVED_PATH \\\n  --runtime-version 1.10\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/run.hyperparameter.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_hub_classifier\"\nMODEL_NAME_DATA=\"${MODEL_NAME}_$1\"\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.12 \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    --config=\"tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml\" \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --is_embedding_trainable=False \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False \\\n    --model_spec=\"gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73\" \\\n    --text_feature=$text_feature\n\n\necho \"Model dir:\"\necho ${JOB_DIR}/model_dir\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/run.local.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\n\npython -m tf_trainer.tf_hub_classifier.run \\\n  --train_path=$train_path \\\n  --validate_path=$valid_path \\\n  --model_dir=\"tf_hub_classifier_local_model_dir\" \\\n  --model_spec=\"gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73\" \\\n  --labels=$labels \\\n  --label_dtypes=$label_dtypes\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/run.ml_engine.sh",
    "content": "#!/bin/bash\n# This script runs one training job on Cloud MLE.\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_hub_classifier\"\nMODEL_NAME_DATA=\"${MODEL_NAME}_$1\"\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\n\nif [ \"$1\" == \"civil_comments\" ]; then\n    batch_size=128\n    dropout_rate=0.12298246947263007\n    learning_rate=0.0001473127671008433\n    dense_units='512,128,64'\n    train_steps=50000\n    eval_period=1000\n    eval_steps=2000\n    config=\"tf_trainer/common/p100_config.yaml\"\n\nelif [ \"$1\" == \"toxicity\" ]; then\n    batch_size=32\n    dropout_rate=0.38925458520872092\n    learning_rate=0.00012916208894260696\n    dense_units='512,128,64'    \n    train_steps=250000\n    eval_period=1000\n    eval_steps=6000\n    config=\"tf_trainer/common/p100_config.yaml\"\n\nelif [ \"$1\" == \"many_communities\" ]; then\n    batch_size=128\n    dropout_rate=0.6987085501984901\n    learning_rate=0.00031738926545884962\n    dense_units='512,128,64'    \n    train_steps=700000\n    eval_period=4000\n    eval_steps=45000\n    config=\"tf_trainer/common/basic_gpu_config.yaml\"\n\nelif [ \"$1\" == \"many_communities_40_per_8_shot\" ]; then\n\n    train_steps=8000\n    eval_steps=250\n    eval_period=200\n    config=\"tf_trainer/common/basic_gpu_config.yaml\"\n\n    if [ \"$2\" == \"optimistic\" ]; then\n\n        batch_size=32\n        dropout_rate=0.69999979814967772\n        learning_rate=7.2549254796945835e-06\n        dense_units='512,64'\n\n    elif [ \"$2\" == \"pessimistic\" ]; then\n        \n        batch_size=32\n        dropout_rate=0.53291173797826941\n        learning_rate=6.1924912697697353e-06\n        dense_units='256,128,64'\n\n    else\n        echo \"Must provide second positional argument.\"\n        exit 1\n    fi\n\nelse\n    echo \"First positional arg must be one of civil_comments, toxicity, many_communities.\"\n    return;\nfi\n\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.10 \\\n    --config $config \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --batch_size=$batch_size \\\n    --dropout_rate=$dropout_rate \\\n    --learning_rate=$learning_rate \\\n    --dense_units=$dense_units \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --model_spec=\"gs://conversationai-models/resources/tfhub/universal-sentence-encoder-large-3/96e8f1d3d4d90ce86b2db128249eb8143a91db73\" \\\n    --text_feature=$text_feature\n\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_classifier/run.py",
    "content": "\"\"\"Experiments with Toxicity Dataset\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.tf_hub_classifier import model as tf_hub_classifier\n\nimport tensorflow as tf\n\nFLAGS = tf.app.flags.FLAGS\n\n\ndef main(argv):\n  del argv  # unused\n\n  dataset = tfrecord_input.TFRecordInput()\n  model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())\n\n  trainer = model_trainer.ModelTrainer(dataset, model)\n  trainer.train_with_eval()\n\n  serving_input_fn = serving_input.create_text_serving_input_fn(\n      text_feature_name=base_model.TEXT_FEATURE_KEY,\n      example_key_name=base_model.EXAMPLE_KEY)\n  trainer.export(serving_input_fn, base_model.EXAMPLE_KEY,\n    metrics_key=\"auc/%s\" % FLAGS.labels.split(',')[0])\n\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_tfjs/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/tf_hub_tfjs/model.py",
    "content": "\"\"\"Tensorflow Estimator using TF Hub universal sentence encoder.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nimport tensorflow_hub as hub\nfrom tf_trainer.common import base_model\nfrom typing import List\n\nFLAGS = tf.app.flags.FLAGS\n\n# Hyperparameters\n# TODO: Add validation\ntf.app.flags.DEFINE_float('learning_rate', 0.00005,\n                          'The learning rate to use during training.')\ntf.app.flags.DEFINE_float('dropout_rate', 0.38925,\n                          'The dropout rate to use during training.')\ntf.app.flags.DEFINE_string(\n    'model_spec',\n    'https://tfhub.dev/google/universal-sentence-encoder-lite/2',\n    'The url of the TF Hub sentence encoding module to use.')\ntf.app.flags.DEFINE_bool('trainable', True,\n                         'What to pass for the TF Hub trainable parameter.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning. The length of the list\n# determines the number of layers, and the size of each layer.\ntf.app.flags.DEFINE_string(\n    'dense_units', '512,128,64',\n    'Comma delimited string for the number of hidden units in the dense layers.'\n)\n\n\nclass TFHubClassifierModel(base_model.BaseModel):\n\n  def __init__(self, target_labels: List[str]) -> None:\n    self._target_labels = target_labels\n\n  @staticmethod\n  def hparams():\n    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]\n    hparams = tf.contrib.training.HParams(\n        learning_rate=FLAGS.learning_rate,\n        dropout_rate=FLAGS.dropout_rate,\n        dense_units=dense_units)\n    return hparams\n\n  def estimator(self, model_dir):\n    estimator = tf.estimator.Estimator(\n        model_fn=self._model_fn,\n        params=self.hparams(),\n        config=tf.estimator.RunConfig(model_dir=model_dir))\n    return estimator\n\n  def _model_fn(self, features, labels, mode, params, config):\n    module = hub.Module(FLAGS.model_spec, trainable=True)\n    logits = module(\n      inputs=dict(\n      values=features['values'],\n      indices=features['indices'],\n      dense_shape=features['dense_shape']))\n    for num_units in params.dense_units:\n      logits = tf.layers.dense(\n          inputs=logits, units=num_units, activation=tf.nn.relu)\n      logits = tf.layers.dropout(logits, rate=params.dropout_rate)\n    logits = tf.layers.dense(\n        inputs=logits, units=len(self._target_labels), activation=None)\n\n    output_heads = [\n        tf.contrib.estimator.binary_classification_head(\n            name=name, weight_column=name + '_weight')\n        for name in self._target_labels\n    ]\n    multihead = tf.contrib.estimator.multi_head(output_heads)\n\n    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)\n    return multihead.create_estimator_spec(\n        features=features,\n        labels=labels,\n        mode=mode,\n        logits=logits,\n        optimizer=optimizer)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_tfjs/notebook/BiasEvaluation.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"BiasEvaluation.ipynb\",\n      \"version\": \"0.3.2\",\n      \"provenance\": [],\n      \"collapsed_sections\": []\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"metadata\": {\n        \"id\": \"9B7PdsrvW__k\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"# Bias Evaluation for TF Javascript Model\\n\",\n        \"\\n\",\n        \"Based on the [FAT* Tutorial Measuring Unintended Bias in Text Classification Models with Real Data](https://github.com/conversationai/unintended-ml-bias-analysis/blob/master/presentations/FAT_star_tutorial.md).\\n\",\n        \"\\n\",\n        \"Copyright 2019 Google LLC.\\n\",\n        \"SPDX-License-Identifier: Apache-2.0\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"0Jsjp3E5rbuC\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"e6aeceef-b28b-4c9d-aec9-c870def2219f\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 35\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!pip3 install --quiet \\\"tensorflow>=1.11\\\"\\n\",\n        \"!pip3 install --quiet sentencepiece\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\u001b[?25l\\r\\u001b[K    0% |▎                               | 10kB 16.3MB/s eta 0:00:01\\r\\u001b[K    1% |▋                               | 20kB 2.2MB/s eta 0:00:01\\r\\u001b[K    2% |█                               | 30kB 3.3MB/s eta 0:00:01\\r\\u001b[K    3% |█▎                              | 40kB 2.2MB/s eta 0:00:01\\r\\u001b[K    4% |█▋                              | 51kB 2.7MB/s eta 0:00:01\\r\\u001b[K    5% |██                              | 61kB 3.2MB/s eta 0:00:01\\r\\u001b[K    6% |██▏                             | 71kB 3.7MB/s eta 0:00:01\\r\\u001b[K    7% |██▌                             | 81kB 4.1MB/s eta 0:00:01\\r\\u001b[K    8% |██▉                             | 92kB 4.6MB/s eta 0:00:01\\r\\u001b[K    9% |███▏                            | 102kB 3.5MB/s eta 0:00:01\\r\\u001b[K    10% |███▌                            | 112kB 3.6MB/s eta 0:00:01\\r\\u001b[K    11% |███▉                            | 122kB 5.0MB/s eta 0:00:01\\r\\u001b[K    12% |████                            | 133kB 5.0MB/s eta 0:00:01\\r\\u001b[K    13% |████▍                           | 143kB 9.3MB/s eta 0:00:01\\r\\u001b[K    14% |████▊                           | 153kB 9.5MB/s eta 0:00:01\\r\\u001b[K    15% |█████                           | 163kB 9.5MB/s eta 0:00:01\\r\\u001b[K    16% |█████▍                          | 174kB 9.3MB/s eta 0:00:01\\r\\u001b[K    17% |█████▊                          | 184kB 9.4MB/s eta 0:00:01\\r\\u001b[K    18% |██████                          | 194kB 9.4MB/s eta 0:00:01\\r\\u001b[K    19% |██████▎                         | 204kB 40.6MB/s eta 0:00:01\\r\\u001b[K    20% |██████▋                         | 215kB 10.4MB/s eta 0:00:01\\r\\u001b[K    21% |███████                         | 225kB 10.5MB/s eta 0:00:01\\r\\u001b[K    22% |███████▎                        | 235kB 10.2MB/s eta 0:00:01\\r\\u001b[K    23% |███████▋                        | 245kB 10.1MB/s eta 0:00:01\\r\\u001b[K    24% |███████▉                        | 256kB 10.1MB/s eta 0:00:01\\r\\u001b[K    25% |████████▏                       | 266kB 9.9MB/s eta 0:00:01\\r\\u001b[K    26% |████████▌                       | 276kB 10.2MB/s eta 0:00:01\\r\\u001b[K    27% |████████▉                       | 286kB 10.2MB/s eta 0:00:01\\r\\u001b[K    28% |█████████▏                      | 296kB 10.2MB/s eta 0:00:01\\r\\u001b[K    29% |█████████▌                      | 307kB 10.4MB/s eta 0:00:01\\r\\u001b[K    30% |█████████▊                      | 317kB 42.5MB/s eta 0:00:01\\r\\u001b[K    31% |██████████                      | 327kB 42.5MB/s eta 0:00:01\\r\\u001b[K    32% |██████████▍                     | 337kB 49.1MB/s eta 0:00:01\\r\\u001b[K    33% |██████████▊                     | 348kB 45.7MB/s eta 0:00:01\\r\\u001b[K    34% |███████████                     | 358kB 44.8MB/s eta 0:00:01\\r\\u001b[K    35% |███████████▍                    | 368kB 49.3MB/s eta 0:00:01\\r\\u001b[K    36% |███████████▋                    | 378kB 47.6MB/s eta 0:00:01\\r\\u001b[K    37% |████████████                    | 389kB 47.8MB/s eta 0:00:01\\r\\u001b[K    38% |████████████▎                   | 399kB 12.3MB/s eta 0:00:01\\r\\u001b[K    39% |████████████▋                   | 409kB 12.3MB/s eta 0:00:01\\r\\u001b[K    40% |█████████████                   | 419kB 12.3MB/s eta 0:00:01\\r\\u001b[K    41% |█████████████▎                  | 430kB 12.2MB/s eta 0:00:01\\r\\u001b[K    42% |█████████████▌                  | 440kB 12.1MB/s eta 0:00:01\\r\\u001b[K    43% |█████████████▉                  | 450kB 12.2MB/s eta 0:00:01\\r\\u001b[K    44% |██████████████▏                 | 460kB 12.2MB/s eta 0:00:01\\r\\u001b[K    45% |██████████████▌                 | 471kB 12.2MB/s eta 0:00:01\\r\\u001b[K    46% |██████████████▉                 | 481kB 12.3MB/s eta 0:00:01\\r\\u001b[K    47% |███████████████▏                | 491kB 12.2MB/s eta 0:00:01\\r\\u001b[K    48% |███████████████▍                | 501kB 47.1MB/s eta 0:00:01\\r\\u001b[K    49% |███████████████▊                | 512kB 44.5MB/s eta 0:00:01\\r\\u001b[K    50% |████████████████                | 522kB 45.4MB/s eta 0:00:01\\r\\u001b[K    51% |████████████████▍               | 532kB 47.8MB/s eta 0:00:01\\r\\u001b[K    52% |████████████████▊               | 542kB 49.3MB/s eta 0:00:01\\r\\u001b[K    53% |█████████████████               | 552kB 53.0MB/s eta 0:00:01\\r\\u001b[K    54% |█████████████████▎              | 563kB 53.3MB/s eta 0:00:01\\r\\u001b[K    55% |█████████████████▋              | 573kB 51.8MB/s eta 0:00:01\\r\\u001b[K    56% |██████████████████              | 583kB 51.9MB/s eta 0:00:01\\r\\u001b[K    57% |██████████████████▎             | 593kB 53.4MB/s eta 0:00:01\\r\\u001b[K    58% |██████████████████▋             | 604kB 53.2MB/s eta 0:00:01\\r\\u001b[K    59% |███████████████████             | 614kB 57.7MB/s eta 0:00:01\\r\\u001b[K    60% |███████████████████▏            | 624kB 55.6MB/s eta 0:00:01\\r\\u001b[K    61% |███████████████████▌            | 634kB 54.8MB/s eta 0:00:01\\r\\u001b[K    62% |███████████████████▉            | 645kB 53.2MB/s eta 0:00:01\\r\\u001b[K    63% |████████████████████▏           | 655kB 52.5MB/s eta 0:00:01\\r\\u001b[K    64% |████████████████████▌           | 665kB 44.3MB/s eta 0:00:01\\r\\u001b[K    64% |████████████████████▉           | 675kB 45.4MB/s eta 0:00:01\\r\\u001b[K    65% |█████████████████████▏          | 686kB 45.2MB/s eta 0:00:01\\r\\u001b[K    66% |█████████████████████▍          | 696kB 45.8MB/s eta 0:00:01\\r\\u001b[K    67% |█████████████████████▊          | 706kB 45.4MB/s eta 0:00:01\\r\\u001b[K    68% |██████████████████████          | 716kB 45.6MB/s eta 0:00:01\\r\\u001b[K    69% |██████████████████████▍         | 727kB 45.7MB/s eta 0:00:01\\r\\u001b[K    70% |██████████████████████▊         | 737kB 45.4MB/s eta 0:00:01\\r\\u001b[K    71% |███████████████████████         | 747kB 47.7MB/s eta 0:00:01\\r\\u001b[K    72% |███████████████████████▎        | 757kB 47.9MB/s eta 0:00:01\\r\\u001b[K    73% |███████████████████████▋        | 768kB 58.4MB/s eta 0:00:01\\r\\u001b[K    74% |████████████████████████        | 778kB 55.5MB/s eta 0:00:01\\r\\u001b[K    75% |████████████████████████▎       | 788kB 55.7MB/s eta 0:00:01\\r\\u001b[K    76% |████████████████████████▋       | 798kB 53.9MB/s eta 0:00:01\\r\\u001b[K    77% |█████████████████████████       | 808kB 54.4MB/s eta 0:00:01\\r\\u001b[K    78% |█████████████████████████▏      | 819kB 28.4MB/s eta 0:00:01\\r\\u001b[K    79% |█████████████████████████▌      | 829kB 28.7MB/s eta 0:00:01\\r\\u001b[K    80% |█████████████████████████▉      | 839kB 28.7MB/s eta 0:00:01\\r\\u001b[K    81% |██████████████████████████▏     | 849kB 28.3MB/s eta 0:00:01\\r\\u001b[K    82% |██████████████████████████▌     | 860kB 26.5MB/s eta 0:00:01\\r\\u001b[K    83% |██████████████████████████▉     | 870kB 26.4MB/s eta 0:00:01\\r\\u001b[K    84% |███████████████████████████     | 880kB 26.9MB/s eta 0:00:01\\r\\u001b[K    85% |███████████████████████████▍    | 890kB 27.1MB/s eta 0:00:01\\r\\u001b[K    86% |███████████████████████████▊    | 901kB 27.6MB/s eta 0:00:01\\r\\u001b[K    87% |████████████████████████████    | 911kB 27.2MB/s eta 0:00:01\\r\\u001b[K    88% |████████████████████████████▍   | 921kB 49.5MB/s eta 0:00:01\\r\\u001b[K    89% |████████████████████████████▊   | 931kB 48.4MB/s eta 0:00:01\\r\\u001b[K    90% |█████████████████████████████   | 942kB 48.4MB/s eta 0:00:01\\r\\u001b[K    91% |█████████████████████████████▎  | 952kB 48.8MB/s eta 0:00:01\\r\\u001b[K    92% |█████████████████████████████▋  | 962kB 53.0MB/s eta 0:00:01\\r\\u001b[K    93% |██████████████████████████████  | 972kB 53.4MB/s eta 0:00:01\\r\\u001b[K    94% |██████████████████████████████▎ | 983kB 53.5MB/s eta 0:00:01\\r\\u001b[K    95% |██████████████████████████████▋ | 993kB 52.7MB/s eta 0:00:01\\r\\u001b[K    96% |██████████████████████████████▉ | 1.0MB 53.2MB/s eta 0:00:01\\r\\u001b[K    97% |███████████████████████████████▏| 1.0MB 54.1MB/s eta 0:00:01\\r\\u001b[K    98% |███████████████████████████████▌| 1.0MB 53.1MB/s eta 0:00:01\\r\\u001b[K    99% |███████████████████████████████▉| 1.0MB 54.6MB/s eta 0:00:01\\r\\u001b[K    100% |████████████████████████████████| 1.0MB 17.6MB/s \\n\",\n            \"\\u001b[?25h\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"4bSQf93oVo7j\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"191c3e9f-d902-4071-e115-720d8d2ed1a5\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 53\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"from __future__ import absolute_import\\n\",\n        \"from __future__ import division\\n\",\n        \"from __future__ import print_function\\n\",\n        \"\\n\",\n        \"import re\\n\",\n        \"import pandas as pd\\n\",\n        \"import numpy as np\\n\",\n        \"import matplotlib.pyplot as plt\\n\",\n        \"import seaborn as sns\\n\",\n        \"import tensorflow as tf\\n\",\n        \"import sentencepiece\\n\",\n        \"from google.colab import auth\\n\",\n        \"from IPython.display import HTML, display\\n\",\n        \"\\n\",\n        \"from sklearn import metrics\\n\",\n        \"\\n\",\n        \"%matplotlib inline\\n\",\n        \"\\n\",\n        \"# autoreload makes it easier to interactively work on code in imported libraries\\n\",\n        \"%load_ext autoreload\\n\",\n        \"%autoreload 2\\n\",\n        \"\\n\",\n        \"# Set pandas display options so we can read more of the comment text.\\n\",\n        \"pd.set_option('max_colwidth', 300)\\n\",\n        \"\\n\",\n        \"# Seed for Pandas sampling, to get consistent sampling results\\n\",\n        \"RANDOM_STATE = 123456789\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"The autoreload extension is already loaded. To reload it, use:\\n\",\n            \"  %reload_ext autoreload\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"12LU1AjWr-da\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"auth.authenticate_user()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"FFFXbLiRrvtz\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"69de8876-a0d8-4e31-816c-a3c135854faa\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 125\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!mkdir -p tfjs_model\\n\",\n        \"!gcloud storage cp --recursive gs://conversationai-public/public_models/tfjs/v1/* tfjs_model\"      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Copying gs://conversationai-public/public_models/tfjs/v1/saved_model.pb...\\n\",\n            \"Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.data-00000-of-00001...\\n\",\n            \"/ [0 files][    0.0 B/  3.9 MiB]                                                \\rCopying gs://conversationai-public/public_models/tfjs/v1/variables/variables.index...\\n\",\n            \"Copying gs://conversationai-public/public_models/tfjs/v1/assets/universal_encoder_8k_spm.model...\\n\",\n            \"- [4/4 files][ 32.3 MiB/ 32.3 MiB] 100% Done                                    \\n\",\n            \"Operation completed over 4 objects/32.3 MiB.                                     \\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"0bmiyJR60gDP\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"ded1805f-f50c-4846-cafb-dfb51d79fa4d\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 35\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"test_df = pd.read_csv(\\n\",\n        \"    'https://raw.githubusercontent.com/conversationai/unintended-ml-bias-analysis/master/unintended_ml_bias/new_madlibber/output_data/English/intersectional_madlibs.csv')\\n\",\n        \"print('test data has %d rows' % len(test_df))\\n\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"test data has 30240 rows\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"lbF4Fy-yjnaH\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"madlibs_words = pd.read_csv(\\n\",\n        \"    'https://raw.githubusercontent.com/conversationai/unintended-ml-bias-analysis/master/unintended_ml_bias/new_madlibber/input_data/English/words.csv')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"rwx0ucIXj4Ba\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"identity_columns = madlibs_words[madlibs_words.type=='identity'].word.tolist()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"mzY7oTzQlHq5\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"for term in identity_columns:\\n\",\n        \"  test_df[term] = test_df['phrase'].apply(\\n\",\n        \"      lambda x: bool(re.search(r'\\\\b{}\\\\b'.format(term), x,\\n\",\n        \"                               flags=re.UNICODE|re.IGNORECASE)))\\n\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"6dP7ANLcl1NC\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"_8RfGq2lX2EY\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"## Score test set with our text classification model\\n\",\n        \"\\n\",\n        \"Using our new model, we can score the set of test comments for toxicity.\\n\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"AfC_yo0Tt5SQ\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"TOXICITY_COLUMN = 'toxicity'\\n\",\n        \"TEXT_COLUMN = 'phrase'\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"E0KT0565tUDp\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"7bbd2622-ea7e-43dd-a6b9-86d1e033508a\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 289\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"predict_fn = tf.contrib.predictor.from_saved_model(\\n\",\n        \"  'tfjs_model', signature_def_key='predict')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\",\n            \"WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\\n\",\n            \"For more information, please see:\\n\",\n            \"  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\\n\",\n            \"  * https://github.com/tensorflow/addons\\n\",\n            \"If you depend on functionality not listed there, please file an issue.\\n\",\n            \"\\n\",\n            \"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\\n\",\n            \"Instructions for updating:\\n\",\n            \"This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\\n\",\n            \"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\\n\",\n            \"Instructions for updating:\\n\",\n            \"Use standard file APIs to check for files with this prefix.\\n\",\n            \"INFO:tensorflow:Restoring parameters from tfjs_model/variables/variables\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"ZppO68XctZPH\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"630cbb60-9f58-4d28-a5da-45b4091f6715\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 35\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"sp = sentencepiece.SentencePieceProcessor()\\n\",\n        \"sp.Load('tfjs_model/assets/universal_encoder_8k_spm.model')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"True\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 17\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"Q3heBWS5tdg9\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"def progress(value, max=100):\\n\",\n        \"    return HTML(\\\"\\\"\\\"\\n\",\n        \"        <progress\\n\",\n        \"            value='{value}'\\n\",\n        \"            max='{max}',\\n\",\n        \"            style='width: 100%'\\n\",\n        \"        >\\n\",\n        \"            {value}\\n\",\n        \"        </progress>\\n\",\n        \"    \\\"\\\"\\\".format(value=value, max=max))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"KSG_Dc7Gti-w\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"86ab2dd0-cd14-48f4-f42a-7a7216de26ec\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"tox_scores = []\\n\",\n        \"nrows = test_df.shape[0]\\n\",\n        \"out = display(progress(0, nrows), display_id=True)\\n\",\n        \"for offset in range(0, nrows):\\n\",\n        \"  out.update(progress(offset, nrows))\\n\",\n        \"  values = sp.EncodeAsIds(test_df[TEXT_COLUMN][offset])\\n\",\n        \"  tox_scores.append(predict_fn({\\n\",\n        \"      'values': values,\\n\",\n        \"      'indices': [(0, i) for i in range(len(values))],\\n\",\n        \"      'dense_shape': [1, len(values)]})['toxicity/probabilities'][0,1])\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/html\": [\n              \"\\n\",\n              \"        <progress\\n\",\n              \"            value='30239'\\n\",\n              \"            max='30240',\\n\",\n              \"            style='width: 100%'\\n\",\n              \"        >\\n\",\n              \"            30239\\n\",\n              \"        </progress>\\n\",\n              \"    \"\n            ],\n            \"text/plain\": [\n              \"<IPython.core.display.HTML object>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"V0YjAtUBWa1p\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"MODEL_NAME = 'tfjs_model'\\n\",\n        \"test_df[MODEL_NAME] = tox_scores\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"P31u4dyyaKKE\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"# Evaluate the overall ROC-AUC\\n\",\n        \"\\n\",\n        \"This calculates the models performance on the entire test set using the ROC-AUC metric.\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"o6IClt8eplMn\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"SUBGROUP_AUC = 'subgroup_auc'\\n\",\n        \"BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC = 'background_positive_subgroup_negative_auc'\\n\",\n        \"BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC = 'background_negative_subgroup_positive_auc'\\n\",\n        \"\\n\",\n        \"def compute_auc(y_true, y_pred):\\n\",\n        \"  try:\\n\",\n        \"    return metrics.roc_auc_score(y_true, y_pred)\\n\",\n        \"  except ValueError:\\n\",\n        \"    return np.nan\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def compute_subgroup_auc(df, subgroup, label, model_name):\\n\",\n        \"  subgroup_examples = df[df[subgroup]]\\n\",\n        \"  return compute_auc(subgroup_examples[label], subgroup_examples[model_name])\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def compute_background_positive_subgroup_negative_auc(df, subgroup, label, model_name):\\n\",\n        \"  \\\"\\\"\\\"Computes the AUC of the within-subgroup negative examples and the background positive examples.\\\"\\\"\\\"\\n\",\n        \"  index = df[label] == 'toxic'\\n\",\n        \"  subgroup_negative_examples = df[df[subgroup] & ~index]\\n\",\n        \"  non_subgroup_positive_examples = df[~df[subgroup] & index]\\n\",\n        \"  examples = subgroup_negative_examples.append(non_subgroup_positive_examples)\\n\",\n        \"  return compute_auc(examples[label], examples[model_name])\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def compute_background_negative_subgroup_positive_auc(df, subgroup, label, model_name):\\n\",\n        \"  \\\"\\\"\\\"Computes the AUC of the within-subgroup positive examples and the background negative examples.\\\"\\\"\\\"\\n\",\n        \"  index = df[label] == 'toxic'\\n\",\n        \"  subgroup_positive_examples = df[df[subgroup] & index]\\n\",\n        \"  non_subgroup_negative_examples = df[~df[subgroup] & ~index]\\n\",\n        \"  examples = subgroup_positive_examples.append(non_subgroup_negative_examples)\\n\",\n        \"  return compute_auc(examples[label], examples[model_name])\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def compute_bias_metrics_for_model(dataset,\\n\",\n        \"                                   subgroups,\\n\",\n        \"                                   model,\\n\",\n        \"                                   label_col,\\n\",\n        \"                                   include_asegs=False):\\n\",\n        \"  \\\"\\\"\\\"Computes per-subgroup metrics for all subgroups and one model.\\\"\\\"\\\"\\n\",\n        \"  records = []\\n\",\n        \"  for subgroup in subgroups:\\n\",\n        \"    record = {\\n\",\n        \"        'subgroup': subgroup,\\n\",\n        \"        'subgroup_size': len(dataset[dataset[subgroup]])\\n\",\n        \"    }\\n\",\n        \"    record[SUBGROUP_AUC] = compute_subgroup_auc(\\n\",\n        \"        dataset, subgroup, label_col, model)\\n\",\n        \"    record[BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC] = compute_background_positive_subgroup_negative_auc(\\n\",\n        \"        dataset, subgroup, label_col, model)\\n\",\n        \"    record[BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC] = compute_background_negative_subgroup_positive_auc(\\n\",\n        \"        dataset, subgroup, label_col, model)\\n\",\n        \"    records.append(record)\\n\",\n        \"  return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)\\n\",\n        \"\\n\",\n        \"bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns, MODEL_NAME, TOXICITY_COLUMN)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"GS9t687KogDQ\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"# Plot a heatmap of bias metrics\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"B5OxkxMqNvaB\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Plot a heatmap of the bias metrics.  Higher scores indicate better results.\\n\",\n        \"* Subgroup AUC measures the ability to separate toxic and non-toxic comments for this identity.\\n\",\n        \"* Negative cross AUC measures the ability to separate non-toxic comments for this identity from toxic comments from the background distribution.\\n\",\n        \"* Positive cross AUC measures the ability to separate toxic comments for this identity from non-toxic comments from the background distribution.\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"AGb1CQn2PZVX\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"15595027-3db8-4526-a4ea-596691143f93\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1645\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"def plot_auc_heatmap(bias_metrics_results, models):\\n\",\n        \"  metrics_list = [SUBGROUP_AUC, BACKGROUND_POSITIVE_SUBGROUP_NEGATIVE_AUC, BACKGROUND_NEGATIVE_SUBGROUP_POSITIVE_AUC]\\n\",\n        \"  df = bias_metrics_results.set_index('subgroup')\\n\",\n        \"  columns = []\\n\",\n        \"  vlines = [i * len(models) for i in range(len(metrics_list))]\\n\",\n        \"  for metric in metrics_list:\\n\",\n        \"    for model in models:\\n\",\n        \"      columns.append(metric)\\n\",\n        \"  num_rows = len(df)\\n\",\n        \"  num_columns = len(columns)\\n\",\n        \"  fig = plt.figure(figsize=(num_columns, 0.5 * num_rows))\\n\",\n        \"  ax = sns.heatmap(df[columns], annot=True, fmt='.2', cbar=True, cmap='Reds_r',\\n\",\n        \"                   vmin=0.5, vmax=1.0)\\n\",\n        \"  ax.xaxis.tick_top()\\n\",\n        \"  plt.xticks(rotation=90)\\n\",\n        \"  ax.vlines(vlines, *ax.get_ylim())\\n\",\n        \"  return ax\\n\",\n        \"\\n\",\n        \"plot_auc_heatmap(bias_metrics_df, [MODEL_NAME])\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"<matplotlib.axes._subplots.AxesSubplot at 0x7f4f00b24ba8>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 97\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAASkAAAZKCAYAAACEXpf4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xd8FWX2x/HPTSiSAiFA6EVAjnRE\\nQAFZitjXsq4oq1gWxbWLXekoCoIookixK7iyq7jq7k9FRRREQKQXj0qTKklICCGEkuT3x1ySCwQS\\nJLkzcz3v1yuv3Jm5IV8u5OSZ5848J5CXl4cxxnhVlNsBjDHmeKxIGWM8zYqUMcbTrEgZYzzNipQx\\nxtOsSBljPM2KlDHG06xIGWM8zYqUMcbTrEgZzxGRq0Xkg5DtmSJylZuZiuLHzH5hRcp40X3ATSHb\\nlwH3uxOl2PyY2ResSBkvigb2hmxHAQGXshSXHzP7Qhm3AxhTiBeAlSKyBueHvwkw1N1IRfJjZl8I\\n2CoIxotEJA5oChwEVFWzXI5UJD9m9gMrUsZzROQr4Kj/mKraw4U4xeLHzH5hp3vGi+4KeVwWOAeo\\n5FKW4vJjZl+wkZTxBRH5TFUvcDvHifBjZi+ykZTxHBG544hdtYIfnuXHzH5hRcp4UbWQx3lACnCx\\nS1mKy4+ZfcFO94wnBd8pSwxulgcmqOr5LkYqkh8z+4FdzGk8R0QGA8uBFcD/gEXAUldDFcGPmf3C\\nipTxootVtSGwWFVbAt2BHJczFcWPmX3BipTxojwRCQBlRKSCqi7GeUvfy/yY2Rds4tx40XtAf2Aa\\nsExEfgP2uBupSH7M7As2cW48TUTqAVWBpaqaKyKXq+qHbuc6Hj9m9jIbSRlPU9VfgV9Ddt0LePoH\\n3o+ZvczmpIzf+HH5Ez9m9gwrUsZv/Dg/4cfMnmFFyhjjaVakjN/48dTJj5k9w4qU8SQRKS8iDQo5\\n9Gy4sxSXHzP7gRUp4zki0hv4AfhvcHu8iNwAoKofu5ntWPyY2S+sSBkvuhNoCyQHtx8GjlwKxWv8\\nmNkXrEgZL8pR1f0UvCu2z80wxeTHzL5gRcp40VwReRuoIyKPAHOAz13OVBQ/ZvYFuy3GeE7wRt3O\\nQCdgP7BAVb9zN9Xx+TGzX9htMcaL1gGf4dy0O0tVc13OUxx+zOwLdrpnvKgpzsJxvYEVIvKKiHi9\\noYEfM/uCne4ZTxORJsBA4BpVPcXtPMXhx8xeZqd7xnNE5BzgMuACYAvwH+AhV0MVwY+Z/cKKlPGi\\n+4H3gSdVdZfbYYrJj5l9wU73jGccWhxORO6k8JblL7kQ67j8mNlvbCRlvCQh+LlqIce8+tvUj5l9\\nxYqU8QxVfTP4MEdVR4QeE5GxLkQqkh8z+42d7hnPEJErgb8BfwK+DjlUFjhDVRu4ket4/JjZb6xI\\nGU8JLnXyIjAmZHcusFpVU10JVYTjZF6jqimuhIogdjGn8RRV3aCqfwZScOZ08nBals92M9exiMhZ\\nqroBmAjEhnzEAx1cjBYxbE7KeI6ITMK5gvt0YCHQDnja1VDH1g1YAFxVyLE84P/CmiYC2UjKeFFz\\nVe2Kc7p0Kc6IpJnLmQqlqoeKZz/gUVX9OzAKp4WVrSdVAqxIGS8qIyIVAUSkmqpuAlq7nKkoU4GO\\nwfmpfwPNgTeP+xWmWKxIGS96Abgm+HmFiGwCVrsbqUjVVfU/ODcYv6CqTwKJLmeKCPbunvE0ESkL\\nxKvqTrezHI+ILMC5NWYyzjzVQeALVW3nZq5IYBPnxnNEZF0h+3KAtcAAVV0c/lRFGoyzrvlIVU0R\\nkUHAeJczRQQbSRnPEZHHgHTgI5x3yC4GqgFfAc+o6jkuxjsmEWkItMK5RmpJcC7NnCSbkzJedJGq\\nTlTVLaq6VVVfAc5T1fluBzsWEXkY+BfQHaeofigit7ubKjLY6Z7xomwReQ74FmdU0g4oJyLnAZmu\\nJju2y4GzVDUHQETK4NwmM9HVVBHAipTxoquAG3BGJQGcuajLca7kvsbFXMcTwCmoh+RiqyCUCCtS\\nxnNUNUNEFgMpqvquiNQM3rfnyXv3gqYDP4jIdzjTKGcDU9yNFBls4tx4joiMAeoBjVX1TBEZBiSq\\n6j3uJju+4IWcbXBGUEtVdaO7iSKDTZwbL2qnqtcAGQCqOgw4w9VERRCR1sCzwJPBj9Ei0tzdVJHB\\nTveMF5UNXsSZByAiVQGvd115HRgCfIczP9UJ51YZTxdXP7AiZbxoLDAfqCcin+CsiHCfu5GKlKqq\\n/w3Z/khE+rmWJoLYnJTxJBGJxblJdx/wk6rudTnScYnIC0A08AXONEoXnNHfhwCqaku2/E42kjKe\\nE+z8+w+gEs6pEyKCqvZwNdjxxQU/X3rE/l7YulInxYqU8aJxQH9gs9tBiiu4jlShRMQu6DwJVqSM\\nF61T1c/cDlGCxO0AfmZFyniRisi/gLk4S544O/3baDPgdgA/syJlvGhX8KOy20FKiL07dRKsSBnP\\nUdXhxzomIh+o6l/Cmce4y644N36TUPRTPMdO906CjaSM33jy1ElE6uJccNoEJ+NqYJyqbgPOdzOb\\n31mRMqZkTAfeAabhjJw6Au8DnVT1gJvB/M6KlDElI1tVXwzZXiQiF7uWJoJYkTJ+k+Z2gGNYFFxC\\nOPS2mB9FpBmAqnq9JZdn2b17xnOCy54M5fD5ncdVdZWrwY5DRL46zuE8j9/S42k2kjJe5LtlT1S1\\nu9sZIpUVKeNFvlv2RESSKXjnsSwQD6xX1dPcSxUZ7HTPeE4kLHsiIq2APqr6sNtZ/M4u5jReFAdU\\nwFn25BKgIlAOZ9mTq1zMVWyquhznNNWcJDvdM140tLCdqvpruIMUl4j8m8MvNK0F7HEpTkSxImW8\\n6H0KfuDLAQ2BxUA3twIVQ+g1Unk4TSSWuZQloliRMp6jqu1Dt0WkBvCES3GKaxnOQn1tcBqDLgJ+\\nwbsdl33D5qSM56nqdqC12zmK8CbO6OlxYDSQg3MphTlJNpIyniMi31NwuhcAqgOfu5eoWOJV9dmQ\\n7fki8oVraSKIFSnjRaHv4OUBGaqa7laYYooWkXaqughARM7CzlRKhBUp41XDCZnfEZGhwWVPvOpO\\n4PngvXp5wMrgPnOSrEgZL3oVmAjcj/PuXrfgPi+vKtBeVc91O0QksiJlvChaVd8P2X7X67fFAOeL\\nyHeq+qPbQSKNFSnjRftFpBcwG2fivAdOJ2MvawesFJE9wP7gvjxVTXIxU0Swe/eM54hIbZy38tvh\\nzEl9D3h9TsqUEhtJGS+6QVVvdjvEiRCRWYXszgHWAqNUdUN4E0UOK1LGi5JE5DycEdShUydUNcu9\\nSEWaC5QHPsJ5d++i4P5VOBd12npTv5MVKeNFlwBXHLEvD+cePq/qcsTCd/NEZKaqDhaRO1xLFQGs\\nSBnPUdUmbmf4HcqLyL3AtzjzaO2BqiLSEeu7d1KsSBnPEZF1hew+NL8zQFUXhzlScfTC6bs3DKco\\nrQWuxrnO61r3YvmfFSnjRS8D6RTM71wMVAO+AsYD57gX7Zi241wysY2C5hFrVdXePj9JVqSMF12k\\nqn8K2X5FRGap6kgRcS1UEf6FM9r7PrjdD/g7cI1riSKEFSnjRdki8hyHz++UC77j59X1mWqoaufQ\\nHSLytVthIondpW286CqcOZ1uwLlAMnA5oHh3ZLJQRPIX6xORMygYVZmTYCMp42V5OCOpLGCfqqa6\\nnOcoIa2sAsC9IpId3K4AbAEedDFeRLAiZbzoA+AH4Jvg9tnADOB81xIdg6pWcztDpLMiZbyo7BH9\\n6v4tIp5emTPYZv2od/KsvfrJsyJlPENEYoIP54SsgpCH0xzU65PQd4U8LotzmUQll7JEFFsFwXiG\\niKynYH7nSHmq6uXbYo4iIp+p6gVu5/A7G0kZz1DVU93O8HsVcn9ereCHOUlWpIznhIyoQuWqamM3\\n8hRT6AR6Hs5lE5e4lCWiWJEyXtQi5HFZnDkpz15qHjQO54JOFZFuOE0k9robKTLYnJTxheBtMZ59\\np0xEPgGexhlBvYFTtP6mqn92M1cksJGU8RwRGcPhp3u1gHiX4hRXeVWdLSLDgedU9R0R+bvboSKB\\nFSnjRStDHufh3MNX2PK8XnKKiFwH9AbaiUgD7BKEEmH37hkvmgWsUdU3cYpUd7z/TtkdQAfgdlXd\\njTNpPsjdSJHB5qSM5wRXD7gXOAV4ChgMDPHrNUci8oGq/sXtHH5lIynjRQdVdSnwV2Ccqn6Lv6cm\\nEtwO4GdWpIwXlRGRgcBlwMzgEihxLmc6GXa6chKsSBkv6oOzPMuVqpqN0yXmNgARKe9mMBN+fh5C\\nmwilqpuA50K2p4cc/gSn7br5g7CRlPEbP7aHSnM7gJ/ZSMr4jSfnd0SkIs5yLUmq2l9EugNLVDVd\\nVf/qcjxfs5GUMSXjDZwR06F1zpOAd1xLE0GsSBm/8erpXryqTgT2Q/48WgV3I0UGK1LGb1a7HeAY\\nokSkEcHTURG5EIh2N1JksCvOjeeISB1gCFBZVXuJSG/gO1Xd6HK0YxKRpsALOLfG7AGWAf1V9UdX\\ng0UAmzg3XvQK8DzwaHB7B86cT3e3AhXDucD1qrrN7SCRxk73jBdFq+onOD33UNVZeP//aiLwsYjM\\nEZH+wdGgKQFe/4c3f0wHRKQHEC0i1UXkNjy+yqWqPq6q7YBrgQPAZBGZ63KsiGBFynjRzTg/7FWB\\nz3CW4vX8AnLBa6U6Bj9qAkvdTRQZbE7KeNEDwCuqeovbQYpLRL7EKUz/BV5U1fkuR4oYVqSMFy0H\\nHhKRZjgjqfdU1eunTv1VdYXbISKRXYJgPCu44kFP4G/An1S1nsuRjnJoQTsRSebwW3YCOA1Nk1yK\\nFjFsJGU8KXjd0aXBjzxgvLuJChey4mbb4OoN+YIjQXOSbCRlPEdEFPgV+AD4wMvXHolIVaA68Bpw\\nEwW37ZTBOU1t4lK0iGEjKeMZIlJeVfcB5wEpIftjAFQ1y61sx9EU6As0AV4K2Z8LTHUlUYSxImW8\\n5HWcSw++pmB+59DIJA9nhU5PUdU5wBwRmaaqX4QeE5EbXYoVUaxIGc9Q1WuDD69W1e9DjwUv7vSy\\ndBH5N1AluF0OqAG86V6kyGBFyniGiDQGBHhKRB7l8Pmd8UADl6IVxwvAAJxW67cDfwHsWqkSYEXK\\neEkFoB3OgnFXh+zPBYa5EegEZKnqVyKyT1V/AH4QkU9xLu40J8He3TOeIyLNVXWV2zlOhIh8DLwM\\nXAVsBtYCD6iqXYZwkmwkZTwjpNPvbBHx24WR1+LMQd0F9AdaAde7mihC2EjKmBIgIjcUsjsHWGv3\\n8Z0cG0kZzwn+wJcF3gI+xnnH7FVVneRqsOM7F+gCfIlzuUQ34Hugioj8rKp3u5jN12ypFuNFt+O8\\ndX81sEJV2wO93I1UpCpAC1Xtp6q3Aq2BU1T1QpylZszvZEXKeFGOqh7EmYSeFtx3iot5iqMeEBOy\\nXQ44TUQSgDh3IkUGO90zXrRYRH4BVFWXisjdOPfyedkYYImI7MI53UsERuCcBj7rZjC/s4lz40ki\\nUllV04KP6wHbVPWAy7GOS0QCOKuJBoBUVc1xOVJEsNM94zki0gb4QEQ2ichWnO4xjV2OdVwi0gJn\\ngb6PVHUHcLeItHU5VkSwImW8aDzOhZB1VbUWTg++CS5nKsoLwL1AdnB7Jh5dA8tvrEgZLzoYvLUE\\ngOB1Rl6flzioqmsObajqaoItuczJsYlz40XpIvIQMBtnfqcHsNPVREVLF5G+QKyInIVzg/EOlzNF\\nBBtJGS+6CeeSg4HAYzj/T73e0urvQC2cxfoeBdJx/h7mJNlIKsIFV7XsqaofBbevB2ao6h53kx1X\\nFrAMZ34nD1gV3OdZqpopIh/hLNgXhZO7LfCNq8EigBWpyPcuzq0ah1QA3gEudydOsbyDc5o3P/j5\\nFuBGoLeboY5HRP4HVMZZASF0NVErUifJilTkS1DV5w9tqOoUEfmbm4GKoY6qdgrdISJe/2GvfGRm\\nUzKsSEW+DBG5C/gW5zTkXGCXu5GKtFBE2h9aQlhEzsC5WdfL5vpxHSw/sCvOI5yIVAIexJkfOYjz\\nwz5eVTNcDXYcIrIZZxI6E4jGOUVNDR725LpSIvIzTqOIXTivsx/WwPIFK1IRTkT+FHwYOk+Cqnr9\\n9KlQInKFqv7H7RwnQkQuV9UP3c7hV3a6F/lC1zEqC5wBLMK/E7r3AL4qUjhXoluR+p2sSEU4VT1s\\nHabgJQmvuhSnJASKforn+DGzZ9jFnH88uYCfmwP4cX7Cj5k9w0ZSEU5EknF+SA79Ns8FJrqXyJgT\\nY0UqwqlqtSP3iUhPN7KUED+eOvkxs2dYkYpwInIqcAeHt//uCtR1LdQxBBe3OyZV/RWPrXLpx8x+\\nY0Uq8r0JvI7TC+5xnNthbnU10bG9j3NqWg6n3fo6nOukTgWWAGer6sfuxSuUHzP7ik2cR74Dqvo6\\nkK6q76vqDRx+WYJnqGp7Ve0ArABOU9Xmqno60AT40d10hfNjZr+xIhX5AiLSFUgVkVtF5Fyc3/Je\\n1kRVNx/aUNWNOD/0XubHzL5gp3uR73qgJs5FkI8Dl+DcJuNlC0RkIbAA51TqTGC5u5GK5MfMvmC3\\nxfyBicgHqvoXt3MURkSaUnA9l6rqSjfzFIcfM/uBFak/MBH5SlW7u53jSMFuMTcAlQh5+15V+7oW\\nqgh+zOwXdrr3x+bV31DTcDqtbC7qiR7ix8y+YEXKeNEmVZ3sdogT5MfMvmBFynjRDyIyBpiDszYT\\nAKr6f+5FKpIfM/uCFak/tjS3AxxDreDn0En9PMDLP/B+zOwLVqQinIi0BobiXLOTB6wGHlfVVar6\\nV1fDHdtQtwP8Dn7M7AtWpCLf6zhtyr/DedepEzAVZ/E7rzp0qwk4t5s0BBYD3dwKVAx+zOwLVqQi\\nX6qq/jdk+yMR6edammJQ1fah2yJSA3jCpTjF4sfMfmHXSUU4EXkB54bXL3Bug+qC0x34Q/DPxK6I\\nLAzeI+cbfszsRTaSinxxwc+XHrG/Fx6d2BWR7yk4dQoA1YHP3UtUND9m9gsrUpGv0And4DpHXnVV\\nyOM8IENV090KU0x+zOwLVqQin18ndIcDbXCWO14kIkNVdZvLmYrix8yeZ0Uqwvl0QvdVnHXY78cp\\nrN2C+y52MVNR/JjZF6xI/cGo6vbgtVNeFq2q74dsv+v1dyTxZ2ZfsCIV4Xw6obtfRHoBs3Ey9wD2\\nuZqoaH7M7AtWpCKfHyd0++Is0DcIZ37ne+BmVxMVzY+ZfcGK1B+D3yZ0b1BVv/2A+zGzL1iRinx+\\nnNBNEpHzcEYj+w/tVNUs9yIVyY+ZfcGKVOTz44TuJcAVR+zLw7l8wqv8mNkXrEhFPt9N6Kqq77qs\\n+DGzX1iRiny+m9AVkXWF7M4B1gIDVHVxmCMVyY+Z/cKKVOTz44Tuy0A68BHOKdPFQDXgK5x1xM9x\\nL9ox+TGzL1iRinx+nNC9SFX/FLL9iojMUtWRIuJaqCL4MbMvWJGKfH6c0M0WkeeAb3FOUdsB5YLF\\nNtPVZMfmx8y+YEUqwvl0QvcqnB523XEm+9cCl+EsO3ONi7mOx4+ZfcGKVITz6YTuXar61KENEUkC\\npqjqVcf5Grf5MbMvWJGKfH6c0I0TkbeAW3AW5xsEDHM1UdH8mNkXbPngCCci3xwxoUtwQreHiMxV\\nVS8WKUTkKmAUsAroq6qpLkcqkh8z+4EVqQgnIjNxfmgOTei2x1nnfDjwgKpe6GK8wwSba4b+h2wB\\nNAD+C6CqD7sQ67j8mNlv7HQv8h2a0O1GwYTu5UAs3pvQXXnE9ipXUpwYP2b2FStSfxx5OCOpLGCf\\nF09FVPVNABGpBVyqqpOD248Bb7gY7Zj8mNlvotwOYErdB0A94BtgDnAaMMPVREV7k8NbwC8P7vMy\\nP2b2BStSka+sqj6sqv8OfjyAc9rnZRVU9V+HNlT1fzjLzHiZHzP7gp3uRSgRiQk+nBOyCkIezqT5\\n127lKqaNIvIMzmR/FM7KDRvdjVQkP2b2BStSkWsVTlEKANcecSwPGBH2RMV3Y/CjJ86Fp/OBd11N\\nVDQ/ZvYFuwTBeJKINAeqBDfLA8+qaksXIxXJj5n9wEZSEU5E1nP4dTwAuara2I08xSEik4CmwOnA\\nQuBMYLSroYrgx8x+YRPnka8F0DL40Ra4F5jkaqKiNVfVrsAaVb0UOAto5nKmovgxsy9YkYpwqron\\n5CNdVT/G200YAMqISEUAEammqpsArzc09WNmX7DTvQhXyG0bNYF4l+IU1ws4V8O/AGwWkR14v6Gp\\nHzP7ghWpyLcbp/HCdpxidSFwnauJilYeGALsAn4B6gJfuJqoaH7M7At2uhf5zgU+AxS4CbgdeMbN\\nQMXQH2itqi1UtTnQCPD6jbp+zOwLVqQi30FVXQr8FRinqt/i/RH0Zpw1sA5Jwbkx2sv8mNkXvP6f\\n1Zy8MiIyEGcp28Ei0h5nSVvPCZk/2wssEZG5we2OwI9uZjsWP2b2GytSka8PznItV6pqtog0BG5z\\nOdOxHFr25MjlTr4Pd5AT4MfMvmJXnBtjPM3mpIwxnmZFyhjjaVakjDGeZkXKGONp9u6eRzWoFJv3\\nyy2eaeRyXI1f+ZTcfQdY0eF0t6MUW+uNziVNG1YvdznJCYpN8PqqqiXORlLGGE+zImWM8TQrUsYY\\nT7MiZYzxNCtSxhhPsyJljPE0K1LGGE+zImWM8TS7mDPCRF12E4H6TYA8cv7zGmwKrrtWMZHo6+4t\\neGKV6uT+byp5S+YS6HYZUW3/BDk55MyYUvA1YXDKXQ8R3awVkEf2+KfJ+bFgxZNAUnVihjwNZcuS\\n89MasseOgECACg8MJqphYzhwgL1jnyD31w1hy3ukn35Zyx33P8RN1/6NPr17HXZs3oKFPPviRKKj\\novjTOZ24s9/NLqX0NxtJRZKGzaBaTXJeGEDO9JeIviLkhyJjJzkThzofk4dDegp5qxZB9bpEtelM\\nzriHyXlvElHN2oUtbnTrM4mqU489d1zP3qeHcso9jx52/JQ7HmTf9LfY84/rIDeXQFINypzTHeLi\\n2HPHDc7X3PFA2PIeKWvvXp4Y/Qwd2xf+mo0YPZYXxozin6+/zLffLeCXdevCnDAyWJGKIFGntSJv\\n5UJnY8cWiImD8hWOel6gfXdyl8+H/dkEmp1J7rJ5kJsLW9aT+9n0sOUtc+ZZHJjzFQC5G9cTiK8I\\nMbHBkAHKtG7LwW9nA5D93FPk7dhOVJ365Kxx1pnL3bqZqBq1IMqd/8blypbl5fHPkVSt2lHHNm3e\\nQqVKFalZozpRUVF0PacT3y1c5EJK/7PTvWMI9lCbAZwCzAKuBwJAC1XNFJFncFZlfBuYAjQEygJD\\nVHWWiDQDXsRZSnY3ThOEBGAqkAm8qKr/LdHQ8QmwOeRULXOXs2/f3sOeFnVWT3ImPw5AIDHJGaX0\\nGwTR0eR++AZs21iisY4lKrEqOT+tzt/OS08jKrEquVl7CCRUJi9rj3M62KQpB5cvZt+U8eSu+5ly\\nV/dh/7+nElW7HlE16xColEBe2s6wZA5VpkwZypQp/EcoOTWVxMqV87cTExPZtGlzuKJFFBtJHdv1\\nwFJVPQdYjVOgCnMtsE1VuwNXAOOC+18A/qGq5wIzgTuD+88ArivxAlWYQCGR6zchb8eWwwtXVBS5\\nL48g97PpRF99R6nHOqbQuIEAUVWrs/+9aey5py/RpzWlzNldOLhgLjlrVhL7wuuU69WHnI3rCv97\\neo2tgPu72Ujq2JoCs4OPZx/7aXQCuojIOcHtCiJSDugAvCwi4PRkO7Tm9VpVTS3xtAAZO52R0yEV\\nE2F32mFPiWrWjryfQu78z9zlFC2A9T9C4tGnLqUlNzWZqMSqBdmqJpGbmgxA3q50cn/bSu5WZ/Rx\\n8IcFRJ3aGObPYd8rL7Iv+DVx//yfK6OooiRVq0pKSsE/8287kgs9LTRFs5HUsQUo6Px7MPg59Ndh\\n2eDn/cCTqtot+HGaqu4HsoDuwX0dVfWekOeXityflhFo3dHZqH2qU7T2ZR/+pLqNyNu6oeBr1iwm\\nIG2cjaTakF469bMwB7+fR5mu5wEQ1aQpuSk7YG+WczAnx5lzqlMPgGhpSu6m9UQ1akKFR4YDUKZD\\nZ3J/WuPJUUqdWrXI3LOHzVu3cvDgQb6aM5fOHc9yO5Yv2Ujq2H4EzgLeB3oG92UANUVkHXA2sARY\\nAFwO/FNEkoD+qjoAWIbTLfgTEekNJFPafdg2KGxeR/TdT0JeHjnvv0ygfXfYuyd/Qj1QsbIzV3XI\\nrz9D07ZE3/0UADkzXi7ViKFyVi4j56fVxL70FuTmsve5pyh74WXk7cnk4JxZZL8wmgqPPQFRUeSs\\n+5mD337tfGFUFLGTp8H+/WQ98ejxv0kpWrl6DU8/N54tW7dRpkw0n305ix5du1CnVi3O69GNYY89\\nwgOPDQbg4vN7cmr9eq5l9TPrFnMMIpIAfABEA3Nx5p6eBB7A6QacCnyDMxE+CWgWfO4wVf1ERJri\\nTKjn4vRkuxaoCLynqkW+z2+L3pUuW/TOP2wkdQyqmg50BxCROOBaVX0ZKGyocUshX78G6HLE7p1A\\n+C5EMiYC2JyUMcbTbCRVDKqaCTRwO4cxf0Q2kjLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSlj\\njKfZJQheFVeRqL/fW/TzvODdeUTFB6j45ltuJym+LhcDkLctfKuQloRA4zPdjhB2NpIyxniaFSlj\\njKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKfZxZwRZuTr01n20zoCgQAD+l5Dy8an\\n5h/7cuFSJr33X8qVLcvFndtz3cU9ABjz1nv8sOZncnJy6HflxZx/dtvw5Z38JkvX/EwgEGDgbTfS\\nUhoX5P3ueya+M8PJ260TfS67kL3Z+3hs7EukpO9i//4D3H7tlXQ/K7wXOI6c8jZL9WcCBBj4jxto\\n2aRRSOZFTJz+H8qVLcPFf+pIn0svcDI/N6kg89/+QvcO4XuN/c6KVBFE5CachqAPFnKsHlBDVReK\\nyDjgeVVdH+6MhyxcpWzc9hvvjnyMtZu3MXDCG7w78jEAcnNzGfHKO7w/ZjAJ8bHcOuJ5zj2rDRu3\\n7uDnX7fw7sjHSNudyV8ffCI8WTlQAAAgAElEQVRsRWrh8tVs2LKd6eNGsPbXzQx4dhLTx43Iz/vE\\nhNeZ8eIoEirG0W/QSHp2bM/i1UqLJg25pdflbPktmb4DngxrkVq4Yg0btm5n+tjHWfvrFgY8P5np\\nYx8vyDzpDWaMf4qE+Dj6DX2anh3bsXj1T7Q4rSG3XHUpW3Yk03fgSCtSJ8CK1MnpAcQBC1W1v9th\\n5i//kXM7nAFAozo1ycjMIjNrL3ExFUjbnUl8bAyJleIBOLtVU75btobLunak5WnOaKtiTAxZ2fvI\\nycklOrr0ZwLmL11Jz47Oku+N6tUhI3MPmXuyiIuNIS1jN/FxMSQmVASg4xktmLdkBVee3y3/67cn\\np1KjamKp5zx25tpO5qws4mKCmWNjSawUzNy6BfOWrOTK87q6mtnvrEgVk4g8i9Pw8xSc7jAfAsOA\\nAyLyK3A/cBdwFVAJEKAR0B/4BaerTEfgVGA60FFVD5RkxpT0XTRvVD9/O7FSHMnpu4iLqUBixXj2\\n7M1mw9bfqJ1UhYUrlfbNmxAdHUVMdHkA3v9yLn9q2zIsBQogOS2d5qcVnI4mVqpIclo6cbExJFaq\\nyJ6sbDZs2Ubt6tVYsGw1HVo1y39u7/sG81tKKpOGPxKWrIdlbnxk5l3ExQQz791bkHn5ajq0bFqQ\\n+YGh/Ja6k0lDHwprZr+zIlV8G1T1fhGpgNOF+BUReQNIUdWPROT+kOfWVdWLReRC4DZVvUJEPgH6\\nAhcA95Z0gSpMaLeyQCDAyLv/zqCX3iAupgK1k6oe1ur0y4VLeX/WXF4Z7N6AMLS9WiAQYNSDdzDw\\n2UnExcZQp0a1w46/+9wTrFm7gYdGv8iHE0cTcKnV+lGZ77+dgc9PIS4mhjrVqx32b/Du2OFO5mcm\\n8OGLo1zL7DdWpIovUUTm4XQgLqpf9tzg5804oyqAkcC3wDJV/bY0AiYlJpCSXtD4c8fOdJIqF7Rd\\n79BcmDrCGXk8O3UGtZKqOGGXrGTy+/9jyqD+xMfGlEa0Y+StTPLO9JC8aVRLrFyQt1Uzpo11uhWP\\nfe0dalevxsqf11EloSI1q1WlaaMG5OTmsHNXBlUSKh3155dK5iqVSQ59jVPTqBb6GrdsyrTRQ53M\\nb7xL7epVg5krUbNaFSdzTm5YM/udXYJQPGfizD91VdVuwL4inn8w5PGhX5cxwcfVSzxdUOfWzfjs\\nux8AWLVuI0mJCcRWOCX/+K0jnid1VwZZ2fv4atEyOrVqyu49WYx56z0mDribhPjY0opWeN4zWzFz\\n7gIn78/rSEqsTFxMhfzj/QaNJDV9F1nZ2cxe8AOdzmjJohVreP39/wKQkpZO1t5sKleMD1/mM0Iy\\n/7KepCpHZB7ydEHmhYvp1KYFi1b+yOsf/C+Y2TkWzsx+ZyOp4mkAzFPVAyJyGRAtIuVwuhMX9zUc\\nCQwFLhKRa1R1ekmHPOP0xjRvWJ+/DRhFVCDA4H7X8sGsb4mLrcB5Z7Xlqp5duOXx5wgEAtx65UVU\\nrhjPv2Z+Q9ruTO4bOzn/zxl1d19qVatS0vGO0raZ0Py0hvS+bzBRgQBD7urLjJmziY+N4bzOHeh1\\nYQ9uHvCkk/eaK6hcqSK9LzmPgc9N4roHhpK9fz+D77yZqKjw/a5t26wJzRufSu8HhhIVFWDI7X9n\\nxudfO5k7tafXBd25edAoAgG4tddlTuaLezLw+Slc9/BwsvftZ/DtN4U1s99Zm/UiBC9BaA10xmmX\\n/h+gE5ABvAu8CTwE9KNg4jxFVV8UkRbAi8AjOO3XLxKRKsDXQCdVzTjW921Qu2beus9KvI6VioYX\\nXAOBAOvn/J/bUYrt1OCid+tnf+xykhMTaHzmH24iy0ZSRVDVNwrZ/VzI41rBz9OCn1eGfO1KoFtw\\n86LgvlSgRYmGNCaC2ZjTGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjjaVakjDGeZkXKGONpVqSMMZ5m\\nF3N6WKBsObcjFF8gANFl3U5x4vJy3U5gimAjKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKdZkTLG\\neJoVKWOMp1mRMsZ4mhUpY4yn2RXnEWbkK9NYqmsJBAIM7HcdLU9rmH/sy/mLmfivjyhXtgwXdzmL\\nPn8+D4Axr09n0WolJyeXW6/6M+d3ahe+vBNfY+man5y8d/SlpZxWkHfeQiZO+zflypbl4m7n0OeK\\ni/OPZe/bx6X9+nP7db248oIeYcsLMPLlqSz98Rcn8619aNmkUUHm+T8wcfp/nMxdzqbPpeezN3sf\\nj42bTEp6Bvv3H+D23lfQPdhp2hTNRlLHISI3icgzxzhWT0Q6BB+PE5FTC3teOC1c+SMbtv7G9DFD\\nePLumxkxZWr+sdzcXJ6Y8jZTht7P1JED+Or7pWxP2cn85Wv4+dfNTB8zhJeHPcjIV6Yd5zuUcN5l\\nq9iwZRvTx4/iyfvvZMSEVw/P++LLTHlyEFOfHcFX8xexPTkl//jEae9RKT4ubFnzM69Yw4at25k+\\ndhhP3nsLIya/fXjmSW8yZdhDTB01iK8WLmF7SipfLVxCi8YNmTpqEM89ejejwvgaRwIbSf1+PYA4\\nYKGqutf2N8T8ZavpeXZbABrVrUVGZhaZWXuJi6lAWkYm8cH25QAdWzVj3rJVXN6tM62aOKOtirEx\\nZO3bR05Oblharc9fspyenTo4eevXISNzD5l7soiLjSFtVwbxsbEkBhtodjyjJfMWL+fKC3qw7tfN\\nrP11E13POrPUMx6Vedkqep7tfN9GdWuTsWcPmVlZxMXEkJax+/DXuHVz5i1dxZU9/5T/9duTU6lR\\nNTHsuf3MilQxiMizQAfgFGAS8CEwDDggIr8C91PQzqoSIEAjoL+qfiIiDwSPRQH/p6rDSyNnclo6\\nzRs1yN9OrBRPctou4mIqkFgpnj17s9mwdTu1k6qyYMUaOrRsSnR0FDHR5QF47/Ov6Xpm67AUqPy8\\nIadKiZUqkpyWTlxsDIkJldizdy8bNm+ldo0kFixbSYdWzQF4evIbDL6rHx98/lVYch6eeRfNGxcM\\nmhMrHnqNneK0Z282G7Zsp3b1qixYsZoOLZvmP7f3g8P5LXUnk4Y8EPbcfmZFqng2qOr9IlIBWKuq\\nr4jIGzj99T4SkftDnltXVS8WkQuB24BPgvvPwWkmuk5Enjtez72SEtpTMRAIMKp/PwaOf5W4mArU\\nqV7tsONfzl/M+198w6vDHyrtWMd0VN6H7mHg2AnExcZQp0YSecB/Pv+KNs2EOjVLrRH0CQntWhkI\\nBBh13z8Y+PwUJ/MRr/G7zwxlzbqNPDR2Ih++8BSBwB+uhd7vYkWqeBJFZB6wH6hWxHPnBj9vxhlV\\nAWThNAQ9CFQFEnGai5aopMTKJKfvyt/esTOdapUr5W93aHE600YNBGDsm/+idlJVAOYsXsGkf3/E\\ny8MeJD42pqRjHTtvlcok70wryJuaRrXEygV5Wzdn2nNPOnlfnUrt6kl88e18Nm37jdnzF7E9JZVy\\nZctSo1oVOrVtHZ7MiQkkp6UfnrlyQkHmlk2ZNnqIk/mN6dROqsbKX9ZTpVJFalarQtOG9cnJyWHn\\nrgyqJFQ66s83R7OJ86KdiTP/1FVVuwH7inj+wZDHARGpj3M6eGHw6zeWRkiAzme0YOa33wOwau0G\\nkhITiIupkH+837BnSE3PICt7H7O/X0qnNs3ZvSeLMa+/y6TB95MQ5onozme2Yeac75y8P68lqUrl\\nw/MOeILUtHSy9mYze/73dGrbiucGPch7E8Yw/YWnueqintx+Xa+wFSiAzm1bFrzGv6w/OvPQ0aSm\\n7yIrO5vZC5fQqU0LFq38kdc/cLo7p6TtIit7H5Urxocts9/ZSKpoDYB5qnpARC4DokWkHM6pW3Fe\\nv6rADlXNFJG2QH2gVFaza9v0NJo3bkDvh58gKhBgyG03MOPLOcTHVOC8ju3odX43bh46hgBw61V/\\npnLFeKZ/+hVpuzPpP3pC/p/z9H23UqtaldKIeHje5qfT/LRG9L73MSfv3f2Y8dks4mNjOO+cs+l1\\n0Xnc/OjjBAIBbu19JZWDE9Juatu0ifMaPzicqKgAQ267kRlffOO8xp3a0+uC7tw8+Gknc69LqVwp\\nnt4XncvA8S9z3cOPk73/AINvu5GoKBsfFFcg9JzZHE5EbgJaA52BvcB/gE44p2rvAm8CDwH9KJg4\\nT1HVF0WkBfAicC7wfzjvBM4FooE2qtrzeN+7Qe2aeetnfVAKf6uSd2qPv0BUFOu/nel2lGI7tZNz\\njdj6rz50OcmJCZzW/g83kWUjqeNQ1TcK2f1cyONawc+HLnxZGfK1K4Fuwc0LSjqbMX8UNuY0xnia\\nFSljjKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSljjKfZxZxeFQhA+QpFP88LAgHIy4MD\\nRd3W6CGHViCIdf9WG3N8NpIyxniaFSljjKdZkTLGeJoVKWOMp1mRMsZ4mhUpY4ynWZEyxniaFSlj\\njKfZxZwRZuSkN1j6488ECDDw9ptoKY3zj30573sm/nOG02a9a2f6XH6h0wL8mQmkpO9yWoBf+1e6\\nnx2+ppsjp7wVbFkOA/9x4+Ety79bxMR3P3BalnftSJ9LL3DyPjuxIO/frqT7WW3Dlhdg5IRXWLpa\\nnTbrd/Wj5ekhreHnzmfi1H85mXt0oc9f/syCpSvoP+xpGjeoB0CThvUZfM8/wprZz6xIHUdwjfMW\\nqvpgIcfqATVUdaGIjAOeV9X14c4YauHy1WzYsp3p455k7a+bGfDsRKaPc1pC5ebm8sSE15gx4WkS\\nKsbRb9BIenZqz+JVSosmjbjl6svZ8lsyfR8bEbYitXBFMO+zj7P21y0MGDeZ6c8+XpB34uvMeGEk\\nCfFx9BvyND07tmPxqp9ocVpDbul1mZN34FNhLVILl65kw+atTJ8whrUbNzFg9HimTxhTkHn8FGZM\\neY6EivH0e2Q4Pc85G4D2rVswfvijYcsZSaxI/X7ea7O+ZAU9O7UHoFG9OmTsDmlbnrGb+LgYEhOC\\nLcDbtGDekhVceX63/K8Pdwvw+UtX0bNju2De2k6b9cNalsce3rJ8yUquPK9rQd6U8Lcsn794WX7h\\naVS/Lhm7Mw9vDR8X0hq+bSvm/bCM2jWSwpox0liRKoYSaLN+dfA5B4EfVPXe0siZnJZO89Ma5m8f\\n1rY8vwX4NmpXr8aCZavo0KpZ/nN79x/EbympTHo8fL/tk9PSD29ZXime5J2hLcv3FuRdvvrwvA8M\\n4beUnUwaFt6Oy8k70w5vDZ9QieSdaQWt4bNCWsMvXUGHNi2pXSOJtRs3cfvAEezK2M2dN/amc7sz\\nwprbz6xIFc/vbrMuInOAp3DaWGWKyMci0l1Vvyrt0Hkc0bb8wTsZOHbiYW3LD3l33AjWrN3AQ6Nf\\n4MOJY1xpAR7aXS0QCDDqgdsZOG4ycTEx1KlxRMvysY87ecdM4MMJT7vWsvyo1vCP9mfg6PHB17g6\\neXl5NKhdiztv6M1F3c9h09bt3Hj/QD6bOplyZcu6ktlv7N294jnUZv0TTrzNehPgZ1XNDO6fDZTK\\nr9GkKpWPbgEe2ra8VTOmPfs4k594lLjYGGpXr8bKn9exbUcKAE0bNchvAR4OSYlH5N2ZRrXE0Jbl\\nzZg2ZhiThz9MXExI3uTUkLy5YcsLkFQ1keSdoa/xTqpVCXmN27Rg2vhRTB45hLi4GGrXSKJ6tSpc\\n3KMLgUCAerVrUjWxMjtSUsOW2e+sSBXtpNqsA3nBz4cc6n5c4jq3bc3MOfMBWPXzuqNbgA98qqAF\\n+Pwf6HRGSxatWM3r7/8XgJRgS/NwtQDv3LYVM+cucPL+sp6kxCPyDh4V0rJ8cUHL8hkhebPDlxeg\\nc7szmPnNt07mn9aSVCWRuJiYgsyPDCtoDT/vezqd2YaPP5/Nq9OdRq/JO9NITUsnqWrpd4iOFHa6\\nV7QGnFyb9Z+A00QkXlV3A12BEaURtG1zoflpDendf5DTAvzOm5kxc7bTtrxzB3pddC43PzYi2Lb8\\nCipXqkjvS85n4LMTue7+IWTv38/gu24OWwvwts2aOHkfGEJUIIohd/ydGZ9/7eTt1J5eF/bg5kEj\\nnbbwV1/u5L24JwPHTea6h4aRvW8/g+/4e1hblrdt0ZTmTRrT+66Hndbw997GjE+/dDJ36UivS87n\\n5oeGEgjArddeReVKFeneuQMPjhjLrG8XcODAQYb2v91O9U6AtVk/jpJos66q3UTkSuABnMI2V1Uf\\nK+p7N6hTK2/93E9K/i9VCk495yIA1n/9X5eTFN+p3S4FYP2CWS4nOTGBWvKHa7NuRcqjrEiVLitS\\n/mFzUsYYT7MiZYzxNCtSxhhPsyJljPE0K1LGGE+zImWM8TQrUsYYT7MiZYzxNLstxqsCAShT3u0U\\nxRMIwMED5G5b53aS4gs4v58Dp4Tvvj/z+9hIyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwx\\nnmZFyhjjaVakjDGeZhdzRpiRL73C0tU/OW3L7zyiBfi3C5g4LdgCvHsX+lxxidMC/PHRBS3AT63P\\n4LtvDVveUe/+j2VrNxEIBHjsb5fQ8tQ6+cfemTWfj79bSnRUFM0b1Oaxv11C1r79DHj1PVIyMokp\\nX44n+/6VapXCe0HmU+MmsGzVagIEGHDfXbRqdnr+sS++mcvEN6ZSrmxZLunZgz69/sKerL088vhI\\ndu3ezYH9B7jz5hvocnaHsGb2MytSRSis1bqIbAjuyzzGlx35ZzQA3lPVdqUQMd/CZSvZsHkb018c\\n7bQAH/MC018cDQRbgL8wmRmTgi3AHxtOz85nAdC+VXPGDwt/C/DvdT0bf0vlnwNvY+3WHQx6fQb/\\nHHgbAJl7s3nt0zl8OvJ+ykRHc8vY11m29leWrt1E3WqJjLvjWhb9tIEX//MFw2/8S9gyL1y8lI2b\\nNjP95Qms3bCRAU+OZvrLE4Dgazx2PB+8MYWEShXpd/8j9Ox6Dl98PZdT69XlgTv68VtyCjfedT+f\\nTn8rbJn9zk73Isj8xcvyC0+j+nXJyHRagAPBFuBxJCZUIioqio5ntGbeD8vcjMv8NWs594ymADSq\\nlURG1l4y92YDULZMNGXLRJO1bz8Hc3LI3n+ASrExbPwtJX+01a5JA374eWNYM3+3aDE9u57jZG5Q\\nn10Zu8ncsweAtPRdVIyLI7FyAlFRUZzdri3zvv+BygmVSM/YBUDG7t1UDrZhN8VjI6niOVVE/g+o\\nCzx3aKeItAYmAAdwOsH0UtWdIvIwTueYXOAxYH3I11wE3A1cqqo5JRkyOS2d5k0a528nViqiBXjr\\nFgUtwAeNYNfuTO68vjed27UpyVjHlLJrN83q18rfrhwfS8quTOIqnEL5smW547IenP/IWE4pV4aL\\nOrSiQY2qNKlTg29W/MT57Vrwva5nW2r6cb5DKWTeuZPmpzfJ306snEBy6k7iYmNJrJzAnqwsNmza\\nTO2aNVjww1I6tG3Drdf/jRn/+5TzrrqOjN2ZTB47MqyZ/c6KVPE0AdoCFYFlwKHikgTcrapLRORx\\n4DoR+RSnQJ0NNAQeJdhnT0QaA4OBi0q6QBXmqDbrj9zLwDEvOC3Aa1Ynj5AW4N3OYdO27dz4wCA+\\ne2uSO33hQhoXZe7NZsr/vuaTp+4jtkJ5+o55jR83bePKLmeim7fTZ+QU2kkDEivGhT9niKParA9+\\nlAFPjiY+NpY6tWpAXh4ffvo5tWok8eq40fz48y8MeGoMM16f7GJqf7EiVTxzVfUAkCoiGUC94P7f\\ngKdFJAaoBUzDaaG+QFVzgV+AW4JzUrE4fftuUNVdpREyqUoiyTvT8rePagHeugXTnnd+i4995S1q\\nVw+2AO/eBYB6tWpStXJldqTspE7N6qUR8TDVEiqSsqtgWm9Hekb+JPjabcnUrVaZyvGxALRtUp9V\\nG7Zwet2aDL3+cgD2ZO9j1pI1pZ4zVFLVqqSk7izInJJKtSoF3Yg7tG3DO5PGAzD2pZepXbMGC5cs\\n45yz2gNw+mmN2ZGSSk5ODtHR0WHN7lc2J1U8RzYnPLT9PPC8qnYFDv1qzKHw17UOMAe4o1QSAp3b\\ntWHmN/OAY7QAf3R4QQvw74ItwL+Yzav/OrIFeGJpRTw8b/PGzPxhJQCrN24hKaEisRWc5WlqV6nM\\n2m3JZO8/4Px9NmyhfvWqfL1cGf/B5wB8PH8pXVo2KfwPL63MHdrx2VffOJn0J5KqViEutuA1vuW+\\nR0jdmUbW3r189e08OrY/k/p1arNslVNMt2zbTmyFClagToA1By1C8N29e4F2QCKwKHioOfAdcCXw\\nK/AJMB+YgjNiagdUASYB9wHvAe2BWcBIVZ15vO/boG7tvPXffXnCece+/CbfL19FVFQUQ+75B6t/\\nXkd8XAznndORmXO+46W3pxMIQN9eV3Bpz25kZmXx4JPPsjtzDwcOHuDOG3rT9awTexPy1I7nwsED\\nrP33Syec99n3PmPRTxuICgQY1OdS1mzcRnxMeXq2bc702Qv54NvFlImKok3jejzY60Ky9x+g/0vv\\nkL5nL5ViK/DMrdcQH3PKCX/fRtfeB8CGpQtO+GufeWkKi5YsJxAVYOiD97L6p1+Ij43lvG5dmDn7\\nGya89hYBAvS97mouu+A89mTtZcCTT5O6M42DOTnce2tfOrZre8LfF4DEWn+45qBWpIoQLFIXAOWB\\nxsBonDmmFsC1OAVsLfA68CJwMdAT+CsQAAbgTJy/p6rtRKQR8DFwlqruPtb3/b1Fyg0nU6TccjJF\\nylVWpIxXWJEqXVak/MPmpIwxnmZFyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjj\\naXaDsZdF+ei6vbw82L/P7RQmAtlIyhjjaVakjDGeZkXKGONpVqSMMZ5mRcoY42lWpIwxnmZFyhjj\\naVakjDGeZhdzRpiRE15h6WolEAgw8K4j2qzPnc/EqcE26z260Ocvf3barA97uqDNesP6DL7nH2HL\\nO+rfn7Fs/WanzXqvC2jZoHb+sXdmf8/HC5c7bdbr1eKxqy9gR/puBr39EfsPHiQnN49Hrzqf5iG9\\n+8LB2qyHlxWpCLJw6Uo2bN7K9AljnDbro8czfcIYINgCfPwUZkwJtll/ZDg9zzkbgPatWzB+uAtt\\n1n/awMYdO/nnwzezdlsyg97+iH8+fDMAmXv38drn8/j08bspEx3FLeOnsmzdZj5bsppz25zONV3O\\nZMnaTTz/0VdMufu6sGW2NuvhZ6d7EWT+4mX5hadR/bpk7D6yzXpsQZv1tq3cb7Ou6zm3tQDQqGY1\\nMrKyydzr3FpzeJv13GCb9QpUjoshPdP5O2Vk7SUhLuaYf35psDbr4WcjqZMgIpVwWlVVAP4P6AcM\\nwmmjngOsUtVbRWQBcK2qrhWROsCHqnpmSedJ3plG8yaN8rcTE4pos96mZUGb9YEj2JWxmztv7E3n\\ndmeUdLRCpWTsoVm9kDbrcTGkZGQSV6E85cuW4Y5LunL+4PGcUrYsF7VrToPqVbixx9lc8/SrfLRg\\nOZnZ+5j6wN/DkjU/s7VZDzsrUifnBmC1qt4rInfgtLCKBS5U1XQR+UZEWgJvA9cATwGXAf8MR7ij\\nWoA/2p+Bo8c7bdZrVCcvL6TNevdz2LR1OzfeP5DPpk52p816iMy9+5jy6Vw+GX4XsaeUp++4t/hx\\n83ZmL/+JC85sxm0XdWH2ip8YM+Nzxv/jatdyWpv10meneyenKfBt8PFHwc87gQ9F5Ovg8So4RenK\\n4PE/U0pFKqlqIsk70/O3j2qz3qYF08aPYvLIIcTFxVC7RrDNeo8uBAIB6tWuSdXEyuxISS2NeEep\\nVimOlIzQNuu7qVYpDoC125OpWzWBynExlCsTTdvG9Vj16zYWr91El2bOaLHT6Q1ZuXFrWLIeUtw2\\n65PHjiQ+No7aNWuwePnKQtusm+KxInVyAkBu8HEeUA6YAFwTbL2+AEBVU4HNItIeiFLVLaURpnO7\\nM5j5jVMzC22z/siwgjbr84Jt1j+fzavTj2yzXqXQP7/E8zZtxMzFqwFY/es2khLiiT3lUJv1BNZu\\nTylos75xK/WrJVIvKZHlG5yXb8XGrdRPCk9L+PzM1mY97Ox07+SsxWmn/h5wERAPZKjqdhGpGzxW\\nLvjct3EK2JTSCtO2RVOaN2lM77seJioQYMi9tzHj0y+Jj43hvC4d6XXJ+dz80FACAbj12quoXKki\\n3Tt34MERY5n17QIOHDjI0P63h+1U74xGdWlerybXjnnNabPe+yI++G4p8RVOoWeb0+l7Xidueu4t\\nykRH0aZhHdqdVp/6SYkMevtjPv3BKW4Drr4wLFkPaduqBc1Pb0Lvfnflt1mf8b9P89usX335JfTt\\n/xABAtx6w7UkJlTimisuZcCTT9Pn9ns5mJPDsIfvC2tmv7MOxidBRKoCHwIHgM+BvsAcoDmwDFgN\\n3Ay0wRl1bQcaqmp6oX9giAZ1a+etXzCrlJKXrFPP6gEH9rP2nXFuRym2Rjc5l1xYB2Pvs5HUyYkF\\nHlfVz0SkI9BVVW864jnPAohId+Dj4hQoY0wBK1InZxdwv4gMwRkp3VPYk0RkOHAB8NcwZjMmIliR\\nOgnBUdEFxXjeUGBo6ScyJvLYu3vGGE+zImWM8TQrUsYYT7MiZYzxNCtSxhhPsyJljPE0uwTBlIwy\\nZYmq26To53mGc6dF3sH9Luc4MX+4y82xkZQxxuOsSBljPM2KlDHG06xIGWM8zYqUMcbTrEgZYzzN\\nipQxxtOsSBljPM0u5owwfmuzPvLlqSz98Rcn7619aBnSN/DL+T8wcfp/nLxdzqbPpeezN3sfj42b\\nTEp6Bvv3H+D23lfQvUN4+gTmZx4/iaWrfyQQgIH33E7LplKQec48Jr71TyfzuV3p89fLAfh45ixe\\needfREdHc8/NN9Ct01lhzexnVqROgoikqGrVI/bdBOxS1Q9E5CpVfS9cefzWZn3hijVs2Lqd6WOH\\nsXbTFgaMe5npY4cV5DvlZIQAACAASURBVJ30JjOeH0FCfBz9ho6hZ8czWbz6Z1o0bsgtV/2ZLTtS\\n6DtoVFiL1MIly9mweQvTJ41j7YZfGTDqWaZPGleQedwEZrwywWmz/uAgenbpRPny5Xnx9am8/+qL\\nZGXt5YXX3rYidQKsSJUwVX0DQETKAffjdJIJi2O1WY+LjTmszTqQ32a9do2kcMU7Ou+yVfQ822nk\\n3KhubTL27CEzK4u4mBjSMnYTHxtDYqWKTt7WzZm3dBVX9vxT/tdvT06lRtXwtrSa/8MSenbp5GRu\\nUI+M3U6b9bjY2OBr7LRZB+h4ZhvmLVrCKeXL0andGcTFxBAXE8MTD/cPa2a/syJVBBH5Eaf7SwBI\\nA7qr6iIR+QyoIiKPA+cDqcClwBAgBacxaEsReQmn7foUoCFQFhiiqiXeCsZvbdaT03bRvPGpBXkr\\nxpOctou4GKc47dmbzYYt26ldvSoLVqymQ8um+c/t/eBwfkvdyaQhD4Qla37mnWk0l4JT6MSESiSn\\npjlt1g+9xpu2ULtmdRYsWUaHM1oBsDd7H7c/OpSM3bu56+/X0zFMr3EksCJVtB+AFjj98xYBHUVk\\nMVAd5y7V91R1iIh8B7QK+boxwFmqeoeIXA9sU9Wbg22wZh3x3FLhtzbroc3VAoHA/7N35nFVVfv/\\nfg4qJqAoIGJYaaWrxAnHKzhl2mB5K9Oy8t4Gm8ix0sqxUsshZ1HMLLuVFjev1a97b2WlVk44omn1\\nqUzLIRUVxQnN9PfH2sAREfV+4bDP8fO8XryEs/fZ571X8WbtxeY8jHryMQZNmmHzVql82vm8N/Z5\\nvv/lV/qPS+GjKS/j8ZTMn956G+E8Hg+jBvZj0KhxhIWGUq1qTO72/VlZJL/0PDt27eL+3s+wYO7b\\nJZbZ39Df7p2br4C/AInAFKAZUBdYgxWBrnf22w6En+UYCcDtxphF2Mu/cs7lYJHib5r16IiKZGR6\\n582ksnOpBNC07rXMHjOUV5/vR1hICLHRldnw82Z+z7D5rr3yCv7880/2HcjySV6A6KhIMvZl5mXe\\ns5fKXpecTePrMXvqeF4dM5yw0FBiY6oQWaki8XVqU7p0KS6PvZTQkHLs23/AZ5n9HS2pc7MIW1J/\\nwQpAw7GFtRA4kW/fs/1oPA68JCJtnI+aIlLk7xHid5r1hnWZv2SlzfvzZqIjKxEWUi4v7/Nj2Lv/\\nAEeys1m0Yi0JDeqwasMPzPrgvwDsyTzAkexjVKpQ3id5ARKbNGT+om9sZvnJata9x7jfIK8xXk5C\\n43haNG1E2pp0Tp48SeaBLI4czaaSs9amnBu93DsHIvKjo0z/Q0QOGmN2ArdjbcWFcZK88U0DbgPe\\nNcZEA31FZGBRZ/U3zXrDa2sRd3V1uvZ7kaAgD0Mfv595X3xN+ZBytE9oQpcbr6P7kNF4PB4e7dKR\\nSuHl6Xrz9Qya/Br3PTOM7ON/MOTx+wkK8t3P2oZ144gzNema1JcgTxBDn+rBvP/Op3xYKO1bJdKl\\n4810f2qAzdytK5WcX1Tc0KYldz/eB4DBfZ/waWZ/RzXr54ExZg720u5xY8zDwDMiUsv7FgRjzFwg\\nGWiDXTh/Fata3wjcA0wHagOlgBdE5JPCXtPvNOunTrF54UclHeW8qXH9HQBsXrW4hJNcGJ7o6hfd\\nQpaWlEvRkipetKT8B51zKoriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRXoyWlKIqr0ZJS\\nFMXV6J/FKEXDnyc4lbGtpFNcAPaeSE8p/RZwOzqTUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0pRVFc\\njZaUoiiuRktKURRXoyWlKIqr0TvZAgy/06y/+zHrNm3FAwy8tyN1r7wsd9vsL5fx8bK1lAryEFe9\\nGgPv7ciRY8cZMPOf7D1wiHJlg3n54S5UDvediAHg5UnTWLfhOzweDwP79qBe7Wtyt33x9RJS3pxN\\ncHAZbml3Hd063877H/+X//fpF7n7bPhBWPvlf3ya2Z/xm5IyxrQBeopI55LO8n/FGLMFqCMih4ry\\nuH6nWf/hF37dtZf3Bj/Bph27GfTGXN4b/AQAh45m88YnX/HZ6P6ULlWK7mNfJ33Tb6T//CuXVY5k\\nUo9urPpxM1M++JxhD3TyXea16/h16zZSX0tm05ZfGfjSK6S+lgw4Yzx+Ch/Mmm41608NoF2rRLp0\\n7ECXjh1yn//Jl4t8ljcQ0Mu9AOJsmnXgNM16UFBQrma9JFn+/c9cH18bgKsujSbr8FEOHc0GoEzp\\nUpQpXZojx45z4s8/yT5+nPDQcvy6ay/1rqwGQONaNVjz0xafZl62ag3tWiXazNWv4MDBQxw6fBiA\\nzP0HqOBo1oOCgvhL43iWrlx92vOnvvE2Tzz4N59m9nf8ZiblEGaMeQeoD7wPfABMxeqjDgL3Y83A\\nfbBOvIbAS8BNQDzQX0Q+NMbcBTzl7LNaRPoYY+KBacAx5+Nu4E9gFlAJO1a9gG1YF1+C89hioAWQ\\njjM7MsaMBTYA84A5QCgQAvQSkRXFNTj+plnfc+AQcVfE5uUtH0rGgYOElbuEsmXK0OO267nhmTGU\\nLVOGDs3qUSOmMrWqxfDVeuGGxnVZ8cMv7NiTWcgrFEPmvZnEmVp5mSuFk7F3n9WsV6rI4SNH2LJ1\\nG7FVY0hbk07T+Pq5+67/7geqVqlM5ciIgg6tnAV/K6nawDXYGeBmrD6qv4ikGWP6YctpIdDA2a8V\\nMBuogZV79jLGfAG8DDRwCuVjY8x1wB3ANBF52xjTFogBugCfishMY0xtYJKItDfGjAeeA8oBL4vI\\nfmNMQXljgJlOMbYFngXuLIZxKRD/06zn5T10NJtX/72QT0b2I7RcWR4c8xo//LaDO1s1Rrb9zn0v\\np9DEXElEhTCf5/TmDM36kGcZ+PIrlA8NpVrVqqe54+d+/F/u6HCj70P6Of52ubdGRI44azkeoLaI\\npDnbFmJnSwDrROQY8Dvwo4gcBnZh7cO1gJ+81oMWOc/7CBhijBkO7BaRH7CzpccdPfo08jTq/wCa\\nANeKyJxC8u4C7jTGLAZGA8WqBvY7zXrF8uw5kLcst3v/QaIds++mHbu5rHIElcqHEly6NI1q1mDj\\nlu0Ely7NC3+/g9kDk3jkljaUK1vktvrCM0dFsmffvrzMe/ZSOTLvP2vT+PrMSZnEq2NfpnxYKLFV\\nq+RuS1u7jvi6cT7NGwj4W0nl15p7E4y97Mu/n/fnHuzPNm93WTBwUkS+xBbPD8A/nNnVcewlWo4e\\nvanznNLYy7dwY0zOlMNbYJjzWF9gu4i0AJLO5wT/L/idZj2uFp+t+tbm3bKd6IrlCS1XFoDYqEr8\\n8nsG2cf/AGDDlm1cERPFV+t+YNK8+QB8vGwtreoWOIMtvszNGvPZwq9tZvnRatZD88b44aeeY+++\\nTI4cPcrCxcto3qQRALsy9hBarlyJzFD9HX+73MvPBmNMcxFZBrQGVp3Hc34EahpjyovIQed5I4wx\\nPYH/iMhsY4wHO7tKwyrVlzmXezeJyHjgaSAVe7n3FHaWlAVUNcb8gr20XAtEAeud170DW4jFhr9p\\n1uNrXkFc9VjuGTGNoCAPQ7rdxgeLVxFW7hLaN6rDQze14v7RMyhdKogGV19B41o1yD7+B3MWLOPu\\n4VMJDw1h3OP3+CRrDlazXouuj/bCExTE80/3Zt5/PqV8WBjtW7fgrr/ewkNPPosHD4/+/R4iHM16\\nxt59RFSq6NOsgYLfGIzz34JgjNmDXXOaip3FZAIPYhfLe4pIZ2NMHSBZRNrk+7wTtmhOAotFZIAx\\n5iZgBHAAu3D+IHAEeBOIxurRewMZ2EX7BOxMdAVwG3CDc0wB9gJfYxXrbwFbsQr2icBw4HnOcQuC\\n3xmMT/zBL/96taSjnDdXdu0NwJa1y0o4yQUSWe2iMxj7TUldbGhJFS9aUv6Dv61JKYpykaElpSiK\\nq9GSUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRX4+9/uxfAeDj976DdjAdK\\nB+OpUaekg5w/p+zfop86erCEg1wY/vJ/RFGiMylFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVo\\nSSmK4mq0pBRFcTVaUoqiuBq9mTPAGDn1NdK/EzweD4N6PkLda/JEll8uXk7KO6kElylDh7at6HbH\\nraSlf0vfF0ZxdfXLAah1ZXWG9H7Md3mTXyP9ux9s3l6Pnpn37Zy8LenWqSNpa9efmbfP4z7LCzBy\\n2uukf/cjHg8M6vEwda+pmZd5SRops9+3ma9rQbfbb8ndln3sGB279yap2110uul6n2b2Z7SkAogV\\n6d+yZdsOUqeOZdOvWxk4ZhKpU8cCcPLkSYZPfpV5MyZSsUJ5Hnn2hVwle5P6dZj84oCSyzttnM07\\neiKp08bl5Z00nXmvTXLyPk+7Fs3z8g4b6PO8ACvWbWDLtt9JTR5tM7+STGry6LzMU2Ywb/p4m3nA\\nMNolNiOmchQAKe+8T3iF8iWS25/Ry70AYvmadbnFc9UVl5F18BCHDh8BIPNAFuXDQomoGE5QUBDN\\nG9Zn6er0koxbQN7Drs4LsHzNetolNgOczIcKGeP4eixdvQ6AX37bxqZft9K6WaMSy+6vBORMyhhT\\nCpgBXIkVdQ51PnqKyAbHsReFtRf3A8KwOqqrsB69E8BqEeljjHkBqAZcDlTFat0/9dJinQBWicjT\\nxpgKwBwgFCsP7SUiK4wxPwOvAh2BskA7x/lXpGTs209cratzv46oGE7GvkzCQkOIqBjO4SNH2bJt\\nB7Ex0aSlr6dpg7rExlRh069bSRo0nANZB+lx/z0kNo4v5FWKMm9mvrwVCsi7ndiYKqSt/dbJG23z\\nDhzGgaxD9HjAd3kBMjIziat1VV7m8IIy54zxBprWt3/POHr6LIb0epQP5vuHAchNBGRJAfcCv4tI\\nd2NMFLAA2HeWfeti1etlsMLPBiJyyBjzsWMxBogVkRuMMXWBtxxt+mCguYgcM8b80xiTiHXyzRSR\\nD40xbYFngTux4/yDiLxijHkPuB74sHhOPQ9vXZnH42HUc30ZNGYSYaEhVIupwqlTp6geeyk9/n4P\\nN1/Xgq07dnL/UwP57J0ZJWLa9bareTweRg14kkGjJxEWGkq1qk7eapfS4/57uPm6ljbvkwP5bHbJ\\n5IXTtdUej4dRz/Zh0CtTnMzRnOIUH85fSIPahmpeynXl/AnUkkoAWhpjWjhfl+Ps9uB1TtHEAT95\\nCTsXYS3GAF8CiMi3xphYIA47s/rMGAMQDlwBbACGGGP6YWdMh71e5xvn323O/kVOdFQEGfsyc7/e\\nvXcflSMr5X7dtEFdZk+26yfjXvsHsTFVqFI5kg5tWwJweWxVoiIqsXvPXqpVjSmOiKfnjcyfd++Z\\neaeMsXlnvElsTDRVKkfRoW0rr7wVfZY3L/N+r8z7qBwZkZe5fh1mTxppM898m9gq0XyxZDlbf9/F\\nouWr2Jmxl+AypYmpHEVCo/o+yezvBOqa1HHgJRFp43zUxFqJcyiTb1+wPxS93wkjGGs4hjPH6Tj2\\ncjDn+PEiMgfoC2wXkRZAUr7nnPD6vFjecSOxcTzzv14KwMYffyY6MoKwkJDc7Y88+zx7M/dz5Gg2\\ni5auIKFRfT7+fBGvp84D7OXX3sz9REdFFke8M/M2acj8r5Z45Y08Pe8z+fM24OPPF/L6e07evb7N\\nC/nHeJMzxuXyMj83LC/zspUkNKrPhCH9mTttLKnJY+jcoR1J3e7SgroAAnUmlYZVn79rjInGlkcW\\ndk1pA5Do/OvNj0BNY0x5Z72oNVa73g5oAYwxxtQDfsWq1K81xkSLyG5jzIvYNbAoYL1zvDs4++yt\\nWGhY51rial1F1579CfJ4GNoniXmffkH50FDat2xOl1tupHv/oXg8Hh69twuVwsO5LrEp/UaMZcGS\\nNP744wTP933CZ5dONu/VdO3RjyBPEEP7Ps68T76gfFgI7Vsm0OXWG+neb4jNe18XKlUM57rEZvQb\\n/goLliznjxMneP5J3+UFaBh3jR3jXs8SFBTE0N6PMu/TLykfFkr7Fn+hyy3t6f7sCzbzPXdSKbyC\\nz7IFKgGpWTfGlAamA7WBUsAL2NnQBOAnYBN2jWoRdjG9s/O8nMXwk8BiERngLJxfDVQAagB9ReRL\\nZ9+B2BnaWqAX0Bh4C9gKJAMTgeHA80AdZ61rLLBBRN4s7ByqX1bNvzTrwOYVC0s4yflTo2kbADYv\\n+6Jkg1wgnmrXXnTvexeQJVWUOCW1R0SSffm6WlLFi5aU/xCoa1KKogQIgbomVWSIyAslnUFRLmZ0\\nJqUoiqvRklIUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI3egqAUEafgj2Pn3s01OPdElvbpXy4p\\n/wM6k1IUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6MlpSiKq9GbOQMMv9Os\\np8wi/fsfbd4nHqKuyfPwfbl0BSmz59q8bRLpdnuH3G3Zx47R8ZEnSbqvM51ubOuzvAAjk2eQvjFH\\nDf8Yda/1HuNlpLz1HsHBZejQtjXdOnUE4OPPFzLz3bmUKlWK3g91o03zpj7N7M9oSRWAMeYmoIaI\\npJxjv1uBzsBzwIsi4rvv7gLwO836uo1s2f47qZNHsunXbQwcN5XUySPz8ibPZN60V2zegSMcZbk1\\nw6TMnkt4+TDfZ07/li3btpOaMp5NW36zaviU8XmZJ6Ywb+YUm/mZobRr0ZyyZYNJfnM2/3ptMkeO\\nZjPljXe0pC4ALakCEJFPL3D/nUCJFhScXbMeFhpymgIcyNWWx8aUnLBy+dr1tEto6uStlqsst3kP\\nUj7UK298PZauWUenG9taZflv20pEWb58dTrtWjS3matfni9zQWO8lkvKBpPQKJ6wkBDCQkIY3r+3\\nz3P7M1pSBWCMeQC4FaiMNcvUB9aKyMM5FmOsbWaTs391YK6INDbG3Ic1x/wJbBSRR53jtXCOZ4BX\\nROT1os7td5r1zP35lOXhZGTud/JW4PBRr7zrNtC0XhwAo1/9B0N6PswHny/ySc7TMu/LJM7rkjQi\\nPJyMffvyxviotxp+PU3j6wFw9Ngxkga8SNbBQ/R88D6aN2rg8+z+ipZU4TQC7gZ2A9uMMRWBIcAL\\nIvKRMaagy8FQ4CYR2W+M+dopNbA69wSgJvAeUOQllR//06zny9u/F4PGTXPyOsryzxe5Sll+inyZ\\nBzzNoFETCQsLoVrVmNxz2n8gi+QRQ9ixazf3932OBf98E4/nohO//E9oSRXOz86lHMaYHVg9em1g\\nqbN9EXBzvufsAz5y9OvXAjl63WUi8qcxRjXrOXkLUpZHeOWtH8fsCSNs3tffcZTlaXnK8j17CS5T\\nhpjKkSQ09I0R+Iwx3pNPs96gLrOTX7GZZ8wiNqYK2ceOEV/nWkqXLsXlsVUJDSnHvv0HiKxU0SeZ\\n/R29BaFwTuT72uN8FKhfN8YEA1OBu0WkNdakXNCxVLMOJDaqz/xvltm8P/1yprJ84Aj2Zh6weZev\\nIqFhPSYMfpq5U8eQOmUUnW9uR9J9nX1WUOCo4Rcttpl//JnoqHxj3H/IGWr4Fk0akrZmHSdPniTz\\nQBZHjh5Vs/EFoDOpC0ewpuLPgOvybSsPnBCRncaYy5z9fPaGRX6nWY+7hriaV9K1z0Cbt9cjzPts\\ngc3bohldbm5H9+eG2bxdO7niG7thndrEmZp0feJpgoI8DO37BPM++dxmbpVAl4430b3fYDzAo/fd\\nRSVnEf2GNi24O+kpAAb3SSIoSOcH54sajAvAa+G8uog0dh5bhb3dIAKYBWwHfgHCsBr3nIXzN4E4\\nYB3wHdAdq1s3ItLPGBOG1axXLyyD/xmMT7F5yWclHeW8qZF4EwCbV35VwkkuDE/MVRfdQpaWlEvR\\nkipetKT8B51zKoriarSkFEVxNVpSiqK4Gi0pRVFcjZaUoiiuRktKURRXoyWlKIqr0ZJSFMXV6J/F\\nuBm/+it5D5QNLekQ588p588vsw+VbA7lnOhMSlEUV6MlpSiKq9GSUhTF1WhJKYriarSkFEVxNVpS\\niqK4Gi0pRVFcjZaUoiiuRm/mDDBGJr9G+nc5CvBHz9Ssv52jWW9Jt04dSVu7/kzNep/HfZd3yquk\\nf/c9HjwM6v04da81eXm/WUbK23OcvG3odudfAfh4/gJmvvu+VZZ3/xttmjfzWV6AkdPfJP2Hn2zm\\npAfyqeFXkvLuPILLlKZD60S63XZT7rbsY8fp+NjTJN17J51uaOPTzP5MQJfU+erSA4Vczfq0cVaz\\nPnoiqdPGAY4CfNJ05r02ydGsP59r4m1Svw6Thw0sgbzrHWX5REdZPp7UlIl5eSdOZd7ryVSsUIFH\\n+g+mXcvmlC1b1irLZ07hyJGjTJn1jk9LasX679iyfSepE19i02/bGDg+hdSJL+VlnvoG86aOpmKF\\nMB4ZPJJ2CU3y1PBz/lUianh/J6BL6kJ16f7OmZr1w+ehWY8uubyr02nX0ktZfvAQhw4fJiw01Mkb\\nRkRF66Zr3ijeKsuDy5LQ2FtZ3se3mdd+S7uEJjbz5dVOH+Osg5QPs/ZlgOYN6rB07bd0uqENv/y2\\n3arhm/rGDh1IBHRJOdaXOti1t6bAJcB0EZnpWF0OAdcAUcCDIrLWGDP+LPvuwBqNLwfuE5E1xpge\\nwL1YD9+HIjLOGBMPTAOOOR93Y5Xrs4BK2DHvJSLri/p8M/Zl5tOsVyhAs56jAP/W0axHW836wGEc\\nyDpEjwd8qFnfl0lcrZpeeXO08KGOsvwIW7ZuJ7ZqFdLWrqNpA0dZnp1N0nPPk3XoED0f7EbzRr77\\nxs/I3E9czSvzModXyFPDh1fg8NFstmz/ndgqlUlbt5Gm9WoDMPq1txjSo3uJqOH9nYAuKS+2iMhT\\nxphywCZgpvN4aRFpZ4zpCAw1xtxTyL5lReRGY8zjwN+NMZlYxVULZ/sSY8z7wIPANBF52xjTFogB\\nugCfOoVXG5gEtC/uk/YWAVkF+JMMGj2JsNBQqlV1NOvVLqXH/fdw83UtrWb9yYF8NtslmvWB/Rg0\\neryT10tZnnWQ5BFD2bFrF/f3eZYF779VYsryMzTr/XowaFyKlxoePvz8KxpcW4tqJThr9WculpKK\\nMMYsBY4Dlb0e/8L5dxkwWkSyjTFn2/cb599tQDPsbKsmsNB5vDxQHfgISDHG1AJSReQHY0wCUNkY\\n083ZN095W4RYbbm3Zn3vmZr1KWMAGDfjTWJjoqlSOYoObVsBOZr1ij7WrO/Ly3uGsrwes5Ptmtq4\\nV98gtmoVso8d91KWX+pzZXl0ZCUyMr3V8Jmnq+Hr1Wb2+GE28xtziK1SmS+WrGDrzt0sSluTp4aP\\niiChYT2fZPZ3LoZbEBoBbYHWItIGewmWQ875e4BTxpjWheybX5N+HPiPiLRxPuqKyNci8iXQBPgB\\n+Icx5jpn315e+zYt+tN0FOBfLQFyNOuRpyvAn8mvWW/Ax58v5PX3HM36Xh9r1ps0Yv5XjrJcfipA\\nWT7YK28aCY3iC1CWZ/vUbJzYsD7zv1luM//0C9GRlU5Xww96mb37D3AkO5tFy1eTEF+XCYOeZO6U\\nkaROeonON7Ul6d47taAugIthJlUdWCoifxhj/gqUMsbkqM9bAv8EmmNtw1HA1rPsm5/VwGhjTAhw\\nFGspfg5rLP6PiMw2xniAeCANuB1Y5lzu3SQi44v6RK1m/Wq69uhHkCeIoX0fZ94nX1A+LIT2LRPo\\ncuuNdO83xGrL7+tCpYrhXJfYjH7DX2HBkuX8ceIEzz/pQ8163drE1apJ16QnrbL8yZ7M+2S+oyxP\\npMutN9H96YFO3rvzlOWtW3D3430B3yvLG8YZq4bvO9hm7tGdefMXUT40hPaJTely8/V0HzDCUcPf\\n7go1vL8T0AZjZ+G8PpCILZIPgQQgCygF/AFUBS4DugG/AZ+fZd+5IvJvY8ytQGcRecAY8wTwEHZh\\n/EMRGenc9jACOICdiT0IHAHeBKKdY/UWkVWFZa9+WbVTm1csLGwX11Cj6XUAbF71zTn2dA81GiUC\\nsHnxJyWc5MLwVK/vT++EWCQE+kwqGDiW7/JqAoDzG7uPROTf+Z5zxr7eOPv/2/l8GvY3ed7bPwUK\\nuvXhzgsNryhKAK9JGWOaA8+StziuKIofErAzKRFZBlxVyPYHfJdGUZT/lYCdSSmKEhhoSSmK4mq0\\npBRFcTVaUoqiuBotKUVRXI2WlKIoriZgb0FQfM0pOHa4pEOcPx7n53O58iWbQzknOpNSFMXVaEkp\\niuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRmzkDDL/TrKfMIv37H23eJx7K\\npyxfQcrsuTZvm0S63d4hd1v2sWN0fORJku7rTKcb2/osL+So4X9w1PCPFaCGf9cZ49b51PBzvdTw\\nxeLiCEguqpIyxrQBeopIZ6/HJgKTRGRzMb3mRyJyW3EcOz9+p1lft5Et238ndfJINv26jYHjppI6\\neWRe3uSZzJv2is07cATtEpvlKctnzy0RZblVw+8gNWWCo4afQGrKhLzME6cx7/Upjhp+iJcafg7/\\nmjmZI0eymTLrbS2pC+Civ9wTkb7FVVDO8X1SUHB2zTpwmmY9KCgoV7Nekixfu552Cfab9aorqpF1\\n6JBX3oOUD/XKG1+PpWvWAfDLb9ussrxZI99nPosa3mbOU8MHBQXRvFEDlq5OZ9mqtSQ0bkBYSAjR\\nURE+V8P7OxfVTMohzBjzDtYi8z7Ws9cTKMOZevS+QDWsWr0q0F9EPjXGPI21FwcB/xWRF40xLwDh\\ngMG+bXFfEfnEGLNHRKK89OsnsYqt/kV9Yn6nWc/cT1ytvHd4jggPz1OWV6zA4aNH2bJtB7Ex0aSt\\n20DTenEAjH71Hwzp+XCJKMsvTA2/nqYN6gJwNPsYSc+94Kjh7/OpGt7fuRhLqjZwDbZgNgMbnccL\\n0qMDxIrIDcaY0qfFKAAAIABJREFUusBb5JlgWmAL5xdjTI5V5jIR6eBorR4HvH1Jk4HHRGS9MeYt\\nY8wVIvJrcZ0kBIBmvX8vBo2b5qUsP8WHny+iQW1DtapVfJ6vIM5Uwz/NoNETvMbYbtufleWlhn+O\\nBe//o8TU8P7GxVhSa0TkCIAj78yhID06wJcAIvKtMSbW2fcI8BXWahwF5LjBFzv/bsPOqrwxIrLe\\nOdbfi/aULP6pWfdWlu87XVleP47ZE0bYvK+/Q2yVaL5YksbW33exaPmqPGV55UgSGtYv9rw2c+Tp\\nY1ygGn6szfzqLGKrRjtq+Nolpob3dy7GNakTBT14Fj065BsjY8wVwFNYC3EbwHs2lF/F7s3J/0Pm\\n88LvNOuN6jP/m2U270+/EB0ZcbqyfOAI9mYesHmXryKhYT0mDH6auVPHkDplFJ1vbkfSfZ19VlCQ\\nM8Y5avifC1DDD3GdGt7fuRhnUgVijOnJmXp0sJd1Y4wx9bCFFAXsFpFDxpiGwBVYCem5+M4Y00xE\\n0owxrwNjReT7ojwHv9Osx11jleV9BhLk8TC01yPM+2yB1ay3aEaXm9vR/blhjrK8kyu+sa0a/mq6\\nJj3lqOF7MO+Tz61mPVcNPwiPBx697658avgnAd+r4f2dgNas5yf/LQjGmD3ABuzCeTXO1KMnAVcD\\nFYAa2IX0RcB/gTDs5V0poIHz+R4RSTbG1AGSRaSN18J5XSDFibJcRPoVltX/NOun2Lzks5KOct7U\\nSLwJgM2rvi7hJBeGp8qVF91C1kVVUheK8xu7PSKS7OvX1pIqXrSk/AedcyqK4mp0TaoQROSFks6g\\nKBc7OpNSFMXVaEkpiuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5Gb0FQiggPBF9S0iEuAOcm5uPZ\\nJRtDOSc6k1IUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6MlpSiKq9GbOQMM\\nv9OsJ88gfaPg8cCgXo9R91rvvMtIeSuV4OAydGjbim6dOgLw8ecLmfnuvyhVKojeD3XzuQ145LQ3\\nSP9eHDV8d+pek+fh+3JJmqOGL02H61qeqYZ/uA9J3e7yuRren9GS+j9gjBmLfY/0nUANEUk5x1OK\\nFb/TrOfkTRnnKMsnkZrilXfidObNnGzzPmPzli0bbJXlr03iyNFsprzxjk9LasW6DWzZvoPUKaPt\\nGI9NJnXK6LzMya8xL2WczTxgOO0SmxJTOQqAlNnvE16+vM+yBgpaUkWAiHx67r2Kn7Np1sNCQ07T\\nrAO5mvXYmOiSy7s6Pbcor6p+ea5m/ex513JJ2bIkNLLK8rCQEIb37+3bzGvX0y6xmc18xWVkHco3\\nxqHemeuxdM16Ot3Y1qrhfy0ZNby/E9AlZYwpA/wDq53KBh4CpgKhQAjQS0RWGGN+Bl4FOgJlgXZY\\nb96cAvbtBjyLFYAeBTYYYx4A6ohIP2PMeKApcAkwXURmGmPeBHYAjbDK9vtEZE1Rn6/fadb3ZRJn\\nvPKGh5+e96h33vU0jXeU5ceOkTTgRbIOHqbng/fSvFEDn+S1mfcTV9NbDV+BjMz8mR01fPq3NK1f\\nB4DR099kSK9H+GC+f8g13ERAlxRwP7BTRO41xnQFbgdmisiHjkr9WeBO7Dj8ICKvGGPeA64Hvsu/\\nrzGmM/Ay0BjIBFZ7v5gx5hJgi4g8ZYwpB2wCZjqby4rIjcaYx4G/A0VeUvnxO806+ZTlA55i0KhJ\\nhIWF5OYF2H/gIMkjBrNj127u7zuABf+cVWLK8jM068/0ZtDYZEcNbzN/OH+hq9Tw/kagl1RD8jTp\\n7xljwoFkY0w/7IzpsNe+3zj/5ijSdwFD8u0bCRwUkd0Axpgl3i8mItnGmAhjzFLgOFD5LMdvVnSn\\nmIffadajIk/XrO8pIG+yd94qjrL8WkdZXtXnyvLoyAgyMr3V8JlUjvDSrNevw+yJL9vMM98mNiaa\\nLxbnV8OXJiYqkoRGvjMv+zOBfgvCn5x+jn2B7SLSAiv+9Ca/Ir2gfT2crkvPr2BvDbQFWjsK9mOF\\nHL/I8TvNepN45i9anJc3Kl/e/kPPyNuiSXw+ZflRn5qNExs3YP7XOWr4TURHVjpdDT9gWF7m5StJ\\naFifCUP6MXfaK6Qmj7Zq+G53aUFdAIE+k1qJLY33jTG3AoOBJ5xtd1C4Hj0KWJ9v371AuDGmInZm\\nlQgsy/ecrSLyhzHmr0ApY8z5KNiLBL/TrNepTZy5mq5PPE1QUBBD+yY5yvJQ2rdKoEvHG+nebzAe\\n8vIC3NCmBXcnPQ3A4D6P+1RZnquG7/2cVcP3ftRRw4fQvsVf6NKhPd2fe9FmvudOV6jh/Z2ANhg7\\nBTETu3D+BzAcmA5sBZKBic5jz2MXvg953VawEXirgH09QB9gC3bhPOc3e3Wc7Z87j38IJABZWBX7\\nXBH5t1OWnUXkgcKy+5/BGDavXFSyQS6AGk1aA7B56eclnOTC8FxW+6IzGAd0SfkzWlLFi5aU/xDo\\na1KKovg5WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVoSSmK4moC/c9i/JsS+sv+\\n/4lTJyH78Ln3cws5Y1s2pPD9lBJHZ1KKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdVo\\nSSmK4mq0pBRFcTVaUoqiuBq94zzAGJk8g/SNgscDg3o9Rt1ra+Vu+3LxMlLeSiU4uAwd2raiW6eO\\nAHz8+UJmvvsvSpUKovdD3XyqLR85/U3Sf/gJDx4GJT1AXS9Z6JdLV5Ly7jyCy5SmQ+tEut12E0ez\\njzFg7FT27D/A8eN/kHTvnVz3F99agUdOnk76dz/YMe6dRN1rTV7mb5aS8ta7BJcpQ4frW9PtztsA\\n+Hj+AmbO+SelSpWid/e/0yahWKxmAYmW1FkwxtyLFTQ8LCLfeD0+EZgkIptLLNxZWJH+LVu27SA1\\nZRybtvzGwNGTSE0ZB8DJkycZPnE682ZOpmKF8jzyzPO0a9GcsmWDSX5zDv96bRJHjmYz5Y13fFZS\\nK9Z/x5btO0md+BKbftvGwPEppE58KS/v1DeYN3U0FSuE8cjgkbRLaMKajUKdWlfx8F23sX1XBg8N\\nGOHTklqxdj1btm0ndfpEO8ajxpM6fWJe5olTmTdzKhXDK/BIv8G0a5lA2bJlSZ71Dv96PZkjR44y\\n5Y23taQuAC2ps9MOeNa7oABEpG8J5Tkny1en065FcwCuqn45WYcOcejwEcJCQ8g8kEX5sFAiHC1U\\n84b1Wbp6LZeULUtCowaEhYQQFhLC8P69fZd37be0S2hi815ejayDh/PyZh2kfFgIERWtEqp5gzos\\nXfstnW5ok/v8nRl7iYmKKOjQxZd59VratUywmatfTtbBgxw6fJiw0FBnjMOIcESlzRs1YOmqtVxS\\nNpiExvF5Y/yMa/8XciVaUoAxpgIwBwgFQoB5QAegiTEmE6vFWgPMB/4G9MSaiGcDFYADQFegIvC2\\nc9gywP0isskY8zNWcZUI7AduERFvyWiRkLEvkzivy6WI8HAy9mUSFhpCRMVwDh89ypZt24mNqULa\\n2vU0ja8LwNFjx0ga8CJZBw/T88F7ad6oQVFHKzhv5n7ial7plbcCGZn7bd7wChw+ms2W7b8TW6Uy\\naes20rRe7dx9u/YdzK49e5k+7DmfZM3NvC+TOFMzL3PFcDL2ZhIWan8AHD5ylC1btxNbtQppa9fR\\nNL4eAEezj5H03PNkHTxIzwf/RvPG8T7N7c9oSVligJki8qExpi3QA+vTmysiXxljrgRuF5GNxpi/\\nOc/pB3wmIpONMU9iZ17bgWEistAY8xBWRPo0cCXwloj0M8YsB+oB6cV9UqfI05V5PB5GDXiKQaMm\\nERYWQrWqVcjRme0/cJDkEYPZsWs39/cdwIJ/zsJTAu/AcEbefj0YNC6FsNAQqsVE4y1fe2/iCL7f\\ntIX+Y6bwUcorJZIXwNsI5/F4GDWwH4NGjSMsNJRqVWNyt+/PyiL5pefZsWsX9/d+hgVz3y6xzP6G\\nlpRlFzDEGNMPKIu1Ex/02n5YRDbme05DYAiAiEwAMMZcBkw2xrwIVAJWO/tmiUiODXkbEF4cJxEd\\nFUnGvv25X+/es5fKkZVyv27aoC6zk8cAMG7Gm8TGVCH72HHi61xL6dKluDy2KqEh5di3/wCRziVL\\ncRIdWYmMTK+8ezOpHOGVt15tZo8fZvO+MYfYKpXZ8NMvRIZXoGp0FNdeVZ0///yTfQeyiKxYLEN6\\nZuaoSDL2ZeZl3rOXyl6XnE3j6zF76nibefobzhgfI75ObWeML/XpGAcCeguCpS+wXURaAEkFbD9e\\nwGN/cub4DcPOrloBL3o9fiLffsXyIzSxSTzzFy0GYOOPPxMdFUlYSN77JT3Sfyh7M/dz5Gg2i5au\\nIKFRA1o0iSdtzTpOnjxJ5oEsjhw96jM1eGLD+sz/ZrnN+9MvREdWIiykXF7eQS+zd/8BjmRns2j5\\nahLi67Lq2++Y9a9/A7DHOZdKFcr7JC9AYpOGzF9klyk3yk9njnG/QV5jvJyExvG0aNqItDXpXmOc\\nrfr1C0BnUpYoIGemcwcQfB7PWQm0BVYaYx4Dsp3jbDLGeIDbsHp1n9GwTm3izNV0feJpgoKCGNo3\\niXmffE750FDat0qgS8cb6d5vMB48PHpfFyo5s48b2rTg7qSnARjc53GCgnzzs6thnCGu5pV07TuY\\noCAPQ3t0Z978RZQPDaF9YlO63Hw93QeMwOPx8GjX26kUXoGut9zAoPEp3PfUULKPH2dIz+4+ywvQ\\nsG4ccaYmXZP6EuQJYuhTPZj33/mUDwulfatEunS8me5PDbCZu3X1GuOW3P14HwAG933Cp5n9HdWs\\nA8aYJsBbwFYgGZiILZgeIvJvY8weEYly9l2EXTjf6jwnHHtpeC/QGhgLbAGmADOAB4E5Xs+fCySL\\nyKLCMlW/rNopf9GW12jSBk6dZPPiT0o6ynlTo9UtAGxetbiEk1wYnujqF91ClpaUS9GSKl60pPwH\\nnXMqiuJqtKQURXE1WlKKorgaLSlFUVyNlpSiKK5GS0pRFFejJaUoiqvRklIUxdXon8UoRUOp0ngq\\nX17SKc6fo4cBOPn5uyUc5MIodd+Ako7gc3QmpSiKq9GSUhTF1WhJKYriarSkFEVxNVpSiqK4Gi0p\\nRVFcjZaUoiiuRktKURRXozdzBhj+pln35sefN/HEU8/wwL1d6da1y2nblqatYHzydEoFBdGqRQI9\\nHnmoRDICjPpsBeu2ZeDxwIAbm1E3Nip325yV3/Px+l8oFeQh7tJIBtyYZyrec+got077gMl3XUfT\\n6lVLIrpfojOpQjDGvGmMubWIj9nTGPNCUR4zB2/N+kvP9GHE5Fdzt+Vo1meMeZF3Jo9m4dIV7Ny9\\nh8wDWSS/OYfZyWOYPuoFvly8vDiinZMjR48yfMw4mjdpXOD2EWPGM+WVkbw7awZLlqXx8y8lY7lf\\nuWUnv+7L4t3utzC8YyIvf5qWu+3QseO8sXQDbz94M+882IFNGQdYt2137vaxX6yiWiXfmW0CBS2p\\nAOJsmnXgNM16UFBQrmZ92er0XM16dGSETzXr3gSXKcNrk8cTXTnqjG1bt20nPLwCVWOqEBQUROsW\\nCSxbsbIEUsLyzb9zvbF//nNV5YpkZR/j0DFrPCtTKogypUpx5PgfnDh5kuw/ThBermzu80KDy1Ar\\nWl17F0rAXe4ZYx4AbsLqz6sBE7DevF5YV95GEXnU2e9m4FKsIv1poClwCTBdRGZ6HTMNuNdRplcD\\nPsKq1+92dqmJtcyMwRpirsRq1oeKyAJjzPVYA81O4Hfgl+I4d3/TrHtTunRpSpcu+H/HjL17iaiU\\nJw2NiKjE1q3bfRXtNPYcPkrtSyNzv64Ucgl7Dh0lrGwwZUuX5onW9blh8r+4pExpbo6rQfXIcI7/\\n+SfTvkonuWtbRn22okRy+zOBOpOKA/6K9eKNAEKBm0QkEbjGGFPX2e9yoBWwF9jiyEFbYiWf3rxN\\nXiH9FXhXRFJEpA3QDdgNpGC1Vr+LyHXA7dhiAhgJdBOR9lg3n084m2a95+ARuZr1U6dOsf/AQaYM\\nH8zIAU8ycNREXG8Qcmm+Q8eOM2Pxt3zSsxPze9/Jt9sz+GHnPmYu/pbODWtR4ZKyJR3RLwm4mZTD\\nVyJyAthjjMkE9gMfGWMArgVyfhSuFJFTQLYxJsIYsxQ766qc73jvAp8BLwO3Ao8AGGOCgH8AvUVk\\nvzEmAWhpjGnhPK+cMSYYqC4i63KyAeUoBvxNs36+RFeOYs+evblf79qdUeBloS+oHFaOPYeO5n69\\n++ARKodZg/GmjANcVjGMSiGXANDw8ips/H0vSzbt4M9TJ5mz8nu2Zh5k/fY9TOjchprRlQp8DeV0\\nAnUm5X1epbAlc7eItAbSvLYdBzDGtMbOulo7s6Nj3gcTkb3ANkciGiQiOdcaA4AlIvKN1/FeEpE2\\nzkdNETkOnDxLtiLF3zTr50u1Sy/l0OHDbNuxgxMnTrDwmyUkNm927icWA4lXxTL/uy0AfPf7XqLL\\nhxBatgwAsRXD2LTnANl/nABg4449XBFRgdkPdeC97rfyXvdbaV2zGkM7/EUL6gII1JlUc2NMKaAS\\ndl1qt4jsNMZcBjTmTI16FLBVRP4wxvwVKOXMgLx5G5iKXXPCGNMMuAFbbjmkYfXq7xpjooG+IjIQ\\n2G7sNO5HoA2wrOhONQ9/06x7s+G7Hxg9YRLbd/xO6dKl+ezLBbRt3ZJql15K+7ZteGHAMzw9YCgA\\nHW5oR40rSua9q+IviyauaiT3vvEfgjweBt/8Fz5I/4nylwTT7poreCihDg+89Smlg4JoUC2axldU\\nKZGcgUTAGYydBfHbgFPA1cArwPXYdap1wHdAd+x6kRGRfsaYcOBz4CjwIZAAZGFnYXMd1XowduH7\\nSufS7jNsAWY4L70YeAGYDtR2nvuCiHxijLnJyfGrc4xtIvJCYefhdwbjoCC2fLfunPu6herVrwRg\\n08RnSjjJhVHqvgEXncE4UGdSm0Skn9fXb+fbPt77CxE5gP3NXg4TCjhmIvCxiOx3nnPjWV774fwP\\niMinwKfnCq0oypkEakkVKcaYF4EbgTtLOouiXGwEXEmJyJvFcMzngeeL+riKopybQP3tnqIoAYKW\\nlKIorkZLSlEUV6MlpSiKq9GSUhTF1WhJKYriagLuFoSAwuNHP0NO/smpfb+XdIrz55JQAIKuv6uE\\ngyjnwo++CxRFuRjRklIUxdVoSSmK4mq0pBRFcTVaUoqiuBotKUVRXI2WlKIorkZLSlEUV6M3cwYY\\nI6e8Svp3P+DBw6Dej1H3WpO77ctvlpHy9rsElylDh7at6XbnXwH4eP4CZr47l1KlStG7+998qlkf\\nOe0N0r8XPB4Pg57oTt1raublXZJGyuy5BJcpTYfrWtLt9g6527KPHaPjw31I6nYXnW5sW9Chiy9z\\n8gzSN/5gMxeosn/PUdm3zqeyd8a4BFX2/shFP5MyxrQxxswt4PGJxpgaJZHpf2VF+npHsz6Bl57t\\ny4jJ03O3Wc36NGaMGcY7U15h4dI0du7OyNOsTx3L9FEv8uXiYnFEFJx33Qa2bN9B6pTRvPR0D0ZM\\nzfWx2rzJrzHj5cG8M+ElFi5byc6MPbnbU2a/T3h53yvLrcp+O6kp4x2Vff4xTrFjPHmMM8Y5KvvZ\\nzE5+pURV9v6KzqTOgoj0LekMF8ry1em0a+mlWT94iEOHDxMWGupo1sOIqGh9es0bNWDp6nQuCQ4m\\nobHVrIeFhDC8fx/f5V27nnaJVk111RWXkXXoMIcOHyEsNMTmDbVaeIDmDeuxdM16Ot3Yll9+28am\\nX7fRulkjn2XNzXwWlX1u5jDvzFZlf0nZYBIaxXuNccmo7P2Vi66kjDFlsELPK4Bs4A0gzBjzDlAf\\neF9EhhljFgE9gc5AOGCAq7Caqk+MMZ2wavYTwCoRedoYcznwDlbnXhprN95GAer14ji3jH2ZxNXK\\nu1yKqJijWQ91NOtH2LJ1O7FVHc16A0eznn2MpOdeIOvQIXo+eB/NG8UXR7wC8u4nruZVeXnDK5CR\\nmV8Lv4PYmGjS0r+laf06AIye/iZDej3CB/MX+iTn6ZkLUtnvK0RlXw/wVtnnjLHvVfb+ykVXUsD9\\nwE4RudcY0xXr5qsNXIO9/N3MmZr1y0Skg6OmetwY8w0wGGguIseMMf80xiQCzYDPRWS4MaYhUBWr\\ncf9dRLobY6KABUA9X5yot67M4/EwauDTDBo9gbDQUEezbrftz8oiecRQduzaxf19nmPB+//A4/G9\\nOemMvM/0ZtDYZMJCQ6gWY7XwH85fSIPahmpV3eGzO1Nl/zSDRk0kLCyEalVjcs9p/4EskkcMYceu\\n3dzf9zkW/PPNEhljf+RiLKmGwJcAIvKeMaYNsEZEjgAYYwr6P2ex8+827KwqDrgc+MxRt4djZ2bz\\ngQ+MMRWxvr5lxpj7KUC97piNi5ToyEgy9mXmfr17zz4qR0bkft20QT1mJ48FYNyrs4itGu1o1ms7\\nmvVLfapZj46MICPTSwu/N5PKEV5569dh9sSXbd6ZbxMbE80Xi9PY+vsuFi1fxc49ewkuU5qYqEgS\\nGtUv9rwA0VER5xjjusxOfsVmnjHLUdkfc73K3s1cjAvnf3LmeZ84x3O8t3uwOvXVXjr1eBGZIyIb\\nsJeM3wAjjTF/5+zq9SInsUlD5n/laNblZ6KjIvJp1od4adbTSGgUT4smDfNp1rN9pllPbNyA+V/b\\nhfqNP20iOrISYSHl8vIOGJaXd/lKEhrWZ8KQfsyd9gqpyaPpfHM7krrd5bOCAmeMT1PZFzbGOSr7\\n/GPsPpW9m7kYZ1IrsWr0940xt/K/XXoJcK0xJlpEdjtevhlAS+AXEfnQGLMHuAtYTsHq9SKnYd3a\\nxNW6mq5JTxEU5GHokz0czXoI7Vsl0uXWm+j+9CA8Hnj0vrvyNOutW3D3408CMLhPks806w3jriGu\\n5pV07f0cQR4PQ3s/yrzPFti8Lf5Clw7t6f7ci1YLf8+drvjGtir7mo7K3sPQvk/kU9nf5Kjs841x\\nmxbcnfQU4NsxDgQCTrN+Lhxd+kzs5dkfwCzgDhHp7GzfIyJR+RbO94hIsjGmDpAsIm2chfOBwDFg\\nLdALiMdq1g9hZ2y9gZ8oQL1+rpzVL6t2avOqr4vuxIuRGo1bAafYvPTzko5y3tRIuAGAzSu/KuEk\\nF4Yn5qqLbiHroispf0FLqnjRkvIfdM6pKIqr0ZJSFMXVaEkpiuJqtKQURXE1WlKKorgaLSlFUVyN\\nlpSiKK5GS0pRFFdzMf5ZjFIseKCUH/7vdPLPkk6gnAOdSSmK4mq0pBRFcTVaUoqiuBo/XERQFMXt\\nOO8Y8hEwQUSS821rB7yMfaeQ/4rI8MKOpTMpRVGKFGNMKDAF5x1wC2AycCeQCNxgjKld2PG0pBRF\\nKWqOAR2AHfk3GGOuBPaJyFYROQn8F7i+sIPp5Z6iBBJHDhT/G8SFhBf6nlYicgI44bz/f35igAyv\\nr3djLUxnRWdSiqKUJOd8Ez+dSQUYfqdZn/oa6d85mvWej1D3Gm9l+XJS3kl18rai2x23kpb+LX1f\\nGMXV1S8HoNaV1RnS+zGf5XV9Zve/0+4O7Gwqh1gKuCz0RkuqCDDGvAc8KCJHSzKHt2Z905bfGDh6\\nAqkpE4A8zfq816dQsUIFHuk/hHYtm1O2bFmS35zDv2ZO5siRbKbMettnJWWV5TtInTqWTb9uZeCY\\nSaROHZuXd/KrzJsxkYoVyvPIsy/QrsVfAGhSvw6TXxzgk4yBkNlNiMgWY0wFY0x1rCLuVuC+wp6j\\nJVUEiEjXks4AfqhZX7Mu95v4qisuc/IWpixPJzamZKWg7s9c8jMpY0wjYBxQHfjDGNMZ+H/AZhH5\\nAEgC3nV2TxWRHws7npZUIRhjKgBzgFAgBGuEuQ7oBJwEPhaRl40xW4A62AXAqVgLzUmgC1ABq3Xf\\nhHXyrRWRh4sjr19q1mt5Kctz8zrK8iPemnWbNzamCpt+3UrSoOEcyDpIj/vvIbGxb/L6a2ZfIyKr\\ngTaFbP8aaH6+x9OSKpwYYKbj0WsLPIvVplfF3oj2eL79o4FeIrLWGDMMO439GGgE3I39TcY2Y0xF\\nEdlPMeP3mvXn+jJozKTTNOvVYy+lx9/v4ebrWrB1x07uf2ogn70zg+AyZXye15WZ3b8mdcFoSRXO\\nLmCIMaYfUBY4DMwFvsDOsGYXsP9oY0wIcKnX9p9FZCeAMWYHVste5CXld5r1/MryvfuoHFnJK29d\\nZk8ebfO+9g9iY6pQpXIkHdq2BODy2KpERVRi9569VKsagy/wx8z+jt6CUDh9ge0i0gJ7HY2IJGFn\\nUDHAImOMd9FPAiaJSGvgVa/H82vci2Wa4n+a9Xjmf73U5v3xZ6Ij8+V99vl8yvL6fPz5Il5PnQfY\\ny9u9mfuJjor0SV6/yHzqVPF/+BidSRVOFLDe+fwOINwYM1REhgHDjDGtsGtO3vtvMsaUxd5xu9yX\\nYf1Os17nWuJqXUXXnv2tZr1PEvM+/cIqy1s2p8stN9K9/1A8Hg+P3tuFSuHhXJfYlH4jxrJgSRp/\\n/HGC5/s+4dNLPX/M7O+owbgQjDFNgLeArUAyMBGoDHyPVakvFZHBXgvn9wJ9sIvks5zndABeF5HG\\nzjFXAZ1FZEthr+1/BmPYnLaghJOcPzWatQX8KzOA59Jahc/CD+4p/m/o8lE+XbDUmVQhiMhK4Fqv\\nh/7fWfar7nw6w/nI4QPn38Ze+zZGUZTzRktKUQKJALwy0oVzRVFcjc6kFCWQ0JmUoiiKb9GZlKIE\\nFDqTUhRF8Sk6k1KUQCLwJlI6k1IUxd3oTEopIk75p7LcH9XwhaG/3VMURfEtAfZjRFEudnQmpSiK\\n4lN0JqUogYSuSSmKovgWnUkpSiChMylFURTfojMpRQkoAm8mpSUVYPidZn3a66R/9yMeDwzq8TB1\\nr8nzBn5ZhEcEAAAgAElEQVS5JI2U2e/bvNe1oNvtt+Ruyz52jI7de5PU7S463XS9z/KC/42xv6OX\\nexeIMWaiMaZGSecoCG/N+kvP9mXE5Om523I06zPGDOOdKa+wcGkaO3dnkHkgi+Q35zB76limj3qR\\nLxcv813edRvYsu13UpNH81K/noxInnl63ikzmPHyEN6Z8BILl61kZ8ae3O0p77xPeIXyPsuam9nt\\nY6y2GEVE+pZ0hrPhf5r19bRLbGbzXnEZWYcKUZbH12Pp6nV0uul6fvltG5t+3UrrZo18ljU3s8vH\\n2BdiFV9rYwO2pM6iSH8XeA3oDPwMrMaq0H8SkfuMMZcCrwPBWEPxwyLymzHmJ2ANMB/4G9AT2IaV\\nf1YADgBdgYrA206EMsD9IrLJGPMz8CGQiJWC3iIiJ4v6nP1Os56ZSVytq/LyhlcoRFm+gab16wAw\\nevoshvR6lA/m+9704m9jHAgE8uVejiL9OmAAVpFeCls2TbCFsUVEmgItjTEVgeHAOBG5HquvGuIc\\n60pgmIi87nX8fsBnItIS+BJoh9WvD3Ne8w3gCa/nvyUizYFKQL1iOufTOJtmveeg4bma9VOnrGZ9\\nyoghjBzwFANHTvDJT+MC83p97vF4GPVsHwa9MoWeQ0dRrWo0pzjFh/MX0qC2oVrVKiWSMT/uG+NT\\nPvjwLQE7k6JgRTrAChE5ZYzZBax1HtuNVZ8nAMYYMxhbaBnO9sMisjHf8RvilJiITMA+8TJgsjHm\\nRWwZrXb2zRKRHMnoNue1ihy/06xHRpCxL882b5XlXnnr12H2pJE278y3ia0SzRdLlrP1910sWr6K\\nnRl7CS5TmpjKUSQ0ql/seW1m/xrjQCCQZ1JnKNIdTpzlcw9wHOgiIm1EpKWIdHK2HS/g+H9y5vgN\\nw86uWgEvnuV1cl6ryPFvzfomR1leLi/vc8Py8i5bSUKj+kwY0p+508aSmjyGzh3akdTtLp8VFPjB\\nGOvCuV+RX5EefB7PSQNuB1KMMW2BGBGZc5Z9VwJtgZXGmMeAbPI06x7gNuxszGf4nWY97hqrLO/1\\nLEFBQQzt/SjzPv2S8mGhtG/xF7rc0p7uz75gleX33Omz8iw0s5+NcSAQsJr1syjSSwFxInLIW3ee\\n8zl2xjQLKIe9+H5ARDYbY/aISJRz3EXYhfOtzvHDgYNYxXprYCywBZiCtRk/CMzxev5cIFlEFhWW\\n3/806/+fvfMOj6Jq+/C9oUkKqYQShFAfIBRDibRQJOIrlvdVQUGwC0oTEFBMpEgvglQBERQFBUXw\\ns4MiiFIFQhDQoyIdhTSSQACV5PvjTJJNCBA0u5ss576uXNnZmZ397YGcfebMzLkzObjlK1dHKTDV\\nW94KQHFp4yxsFWpcsQrPPHnQ4X/QtgrVnXqCz207qeKO6aQci+mk/k0G53ZS7ny4ZzBch7hf0WEO\\njA0GQ5HGVFIGgzvhhsM3ppIyGAxFGlNJGQxuhamkDAaDwamYSspgcCfMmJTBYDA4F1NJFWmcPXPP\\nv6Q4adazmtbmZt/TppIyGAwG52IqKYPBrTCVlMFgMDgVU0kZDO6E+xVSppIyGAxFG1NJGQzuhDm7\\nZzAYDM7FVFIGg1vhfpWU6aTcDK0A/9FSgD+djwL8HUsB3j6PAvx9OwX4zc7LO+8Ndv/4MzabjZi+\\nj9NQauXk3bydectW6rztW9Pzf52z152/cIG7eg2mT48u3HvbLU7LCzBx1nytWbdBzDN98rTxZua9\\nZWnWO7aj533/Baw2fuc9q40fpn0r57Vxcceph3si8qCIKBGJzPN8kVWX50VEKorIAlfnyA+tAD/O\\ninkzGP/8YMbNmpe9TivA5/LalLEsnf0y6zdvtVOAL2PZ3GmWAnyr8/LG7ePQ8d9ZMWsi45/ty7i5\\nOVrDjIwMxs55ndfGx7B0+ljWWwqrLOYtW4mvj7fTsmZnjrXaeP4Mxj//LONmXqaN57zM+k12mvU3\\nlrLs1enMnzzGaNavEWdXUlHA80qpb+2fLMrq8rwopf4AnnJ1jvy4NgV4OJt3xnJD6TK0ahbuGs16\\n7B6iWkXovNWq5NGsp+HjlUezviuOe2+7RWvWjxxzkWY9lqjIVjpzaFVS09IubWN/O836jlhuKFM6\\ndxs/V2z+uxcJHNJJXUZx7gt0BpqLSDLwOg5Ul4tIFNpI/CeQDNyPln8ORHvwmgDjgf8A4cAwpdSH\\nInIvMMTaZodSaoiIPArcDlQGhgMzlVLNRORWYALawbdcKTVDRHpYn/cisE8p1dt6fRugPCDA1Dw2\\n5ELh2hTgcUTcpEXK586fp8/wUZYCvKcTNeun82jWfYlPPm1p1stx9pydZj1uLxGNwgCYvGAJI/o/\\nyeovNzglZ67MScmESZ42TrRr4/Rzuds4PKuNL+g2Tkuj/2MP0bKZg9rYnN0rMJcozpVSXwJfAC8o\\npb7B8epyf+BBpVQ7IBW4zXr+JqAn8DQwCa2cehp4VES8gReBW6zX3Sgira3XVQXaAscBLLfeq+iO\\ntzUQJSJl0R3zf5RSrYG6ItLQen1D4F6012/ANbXmP+RSBfhQYiZPp3/MGKpUqkhmZiaZmZmcTk1j\\n9riRTHxhCNETp7tOs54377ABxEx7lf6jp1CloqVZ/3JDEdOs5zzObuNJ0+gf/ZLVxvaa9ZFMjB5K\\n9MRpLmvj4oijDvcupzi3x9Hq8njgdREpie7Qvkb78eKUUhdE5HfgZ6XUWUu57guEoTujNSKC9Vw1\\na3/fW3r2rP2XB84rpbJU7HdaeZOA/7O2qwcEWuu3KKUuiogDNesBxCclZS/nrwCfBsC0BYsJqVTB\\nUoDXKzqa9QD/nLyNw1j2yjidd9FSS7O+LUeznpBI6VKlqFg+kFZNnKRZD8qrWU+kfJBdG4c3Ytnc\\n6Trz/MWEVKzA+QsXnKhZd7/Oz1GV1OUU5/Y4Wl2+GOhvVUT/d5nX5adZ32lp1tsrpcLtDMZ5816S\\nVURKA3OBB6z33XaF9yp0WjdvaqcA/yUfBfiLRUuz3rQxa7/Vg8j7fvntUs169DgSk1N03q07aNWk\\nEa+8OISVc6ewYvYkutweRZ8eXZzWQYGlWd+gh1R1GwfmbuOhMXZtvJVWzcJpE9GUbbt2u6SN3QFH\\nVVL/RHEOhasu9wWOiIgf0MEuz5VQQD0RCVZKnbKqt9fy3VCpRBEpISIhwAngY+AR4G+l1B9WBdiM\\ngn/2f41WgNemW5/BlgK8P6s+X4uPl5edAjxaa8t7PJBHAa4Hc52uWa9dg24Do/Gw2Rg5oBer1nyt\\n87a5ma63R/HE8DE6b7d7i8QfdpOGYYRJbbr1GYSHzYORz/Zj1WdrtRq+bWu63nU7Tzz7gs7cs1tO\\nG7eP5IGn9UmJFwf1dVwbu+FhpKM6qbeAt0SkK1px3l1EHivA62Zar9tAjro8Ea0sP2T9fk1EOhVg\\nX3OBTcDPwBRgNBB9pRcopdJFZBDwmYhcAGLRHdDl6AustB6/Z3VcX4rI90Cc9b6voBXvTmHI04/n\\nWq5bq0b2407t2tCpXZtLXtPtv3fQ7b93ODxbfgx58qFcy3VrhmY/7hTZgk6RLS772gEPP+CoWFdk\\nyNNP5FquWytn8L8otnFxx2jWiyhas/7t1TcsAlRvFglkcnDTGldHKTDV2/wHgIM7vnNxkmvDFhx6\\nZc36oTjHa9ZDGzt1ylhz757BYCjSmNtiDAZ3wg2PjEwlZTAYijSmkjIY3ImMjKtvU8wwlZTBYCjS\\nmErKYHAnzJiUwWAwOBdTSRkM7kSmGZMyGAwGp2IqKUMhYYNSZVwdouBkjd38fcG1OQobMyZlMBgM\\nzsVUUgaDO2HGpAwGg8G5mErKYHAjnDGriVOnQMBUUgaDoYhjKimDwZ0w9+4ZDAaDczGVlMHgTrjh\\n2T3TSbkZE2cvYPf+H7FhI+aZp2lYL1vBxbpvtzDv7XcoXaoUnW9pT8/77gbg47Vf8/q771OiRAme\\neeIh2re82Xl55yxk9/6fsNlsxAzoTcO6dXLyfreVeW+vsPJG0vPeu9gWu4dBoydRK7QqAHVqhDJi\\n4NNOywswce7r7N6vdOb+vWhYN0cWuu67rcxb+l5O5nvuZNvuHxg0erJd5mqMeMZBEmw3vJjTdFLX\\niGUjTlFKrXZ1lrxs372HQ8eOs2LeDA4cOkL05OmsmKcdEBkZGYydMZdVi+bgV64cvYa9SFRkS8qU\\nKcOcN5fxweuzSU8/x+w3ljqtk9q++wcOHTvBilenceDwUaInz2DFq9Ny8s6cz6qFM/Er50Ov50cR\\n1UYr5Js3bsCsMVd0ajgw816dee5UnXnKLFbMnZqTedZrrHrtFSvzS0S1aZGT+aXhLslc3DGd1DWi\\nlHrT1Rkux9adu4mK1H/INUOrkpp2hjNnz+Lt5UVySio+3t4E+GkhZcum4WzeGcsNpcvQqlk43p6e\\neHt6MnbYQOfl3RWX/Udcs9qNpKad5czZdLy9PK28Wl0O0LJJYzbv3E1IxWCn5cuPSzOfuULmRmze\\nGefczOZwr/hiVUDt0B6/MCAG6A7UB3qgHXkPAhnAh0qpaSKyCPhcKbVSRF4HvgLqAglKqTkiMhO4\\nGS3+fBr4CVgCVEHr1kcrpT6xFF1fop2CQcBdSqkjhf0Z45OSCauTc+gR4OdLfFIy3l76D+fsuXQO\\nHT1OSKUKbIuNI+ImbaY/d/48fYaPIvXMGfo/1pOWTcMLO9oV8tayy1vOyuup86af49Cx44RUrMC2\\n2B+IuKkhIRWDOXD4KH2ix5CSeoZ+j3andTPn5M3JnKOwymlj+8wnCKkYzLbdeTLHjCMlNY1+j3Rz\\naubiznXTSVnUBiKBJ4EXgHDgUbSPrxyQJUzbJCLvA8+hHXxHgBCl1HIRGQ0gIlHAjUqpFiLSFngA\\n7QVcq5RaIiI1gPeBT6x9piqlOorIJOBenODis7+wz2azMSl6KDGTp+Pt5UWVShWz159OTWPOuJGc\\nOHmSRwY+z9fvv4XN5uxL9nIPp9hsNia9MJiYyTOtvBXIzMwktEpl+j3Snds7RHL0xB88MjiaNcte\\no3SpUk7PqzPnaePhg4iZMgtvL0+qVLQyh1Sm38PduL1DG5352RjWLF3gmMxuOCZ1vV2CsEMplQn8\\nDuxRSl0ETgKN0B3YeuvHBwhVSiWiDcYfAwPy7KsJWj6KUmqjUmoEkAw0F5FN6Ioq0G77LIneMbRd\\nudAJDgwgPikpe/lUQhLlAwOylyNuasSyOdNYMHkM3l6ehFSqQGCAP+EN6lGyZAmqhlTGy7MsSadT\\nHBHvMnmTc/ImJlI+0N8ub0OWzZ7CgkmjdN6KwVQoH0TnW9pis9moGlKJoAA/TiUkOiUvQHBQAPFJ\\np+0yJ+XJ3IBlsyaxYOJIvL2zMgfS+ZZIu8z+Ts1c3LneOqm/L/M4APhUKdXe+mmolNporasInAHy\\nDixc5NL2e9DaVyRaL3+593ZImdK6eVPWfqNll/vULwQHBeDt6Zm9vtewF0lMPk36ufNs2LyNVk3D\\nadO8Cdt2xZGRkUFySirp5847TWfeunkT1n6zSef9+VeCAwNz531ulF3e7bRqehMff7meRctXARCf\\nmExi8mmCgwLz3b9DMjcLZ+3GrMwHCA7M08bPj7bL/L2VeQOLVujzLPFJDs6cmeH4HydzvR3uXY6d\\nQAcR8QTOoQ/FhqM7qE5AR2CFiLS2e8331jZTRSQcfQj5G3BQKZUhIvcCpZ34GWjSsD5hdWrTrc9g\\nPDxsjBzcn1Wfr8XHy4tb27am653/4Ykh0dhsNnr3eAB/a4C3U7s2PPD0IABeHNgHDw/nfHc1aVCP\\nsDq16NZvKB42D0YOeppVn3+Fj7cnt0a2ouudt/HE0BFW3q74+/nSofXNDB07la83beWvv/9m1OC+\\nTj3Uy87c/zk8bDZGDnyaVV+sw8fLk1sjW9L1jk48MWwUNhv0frAL/r7l6NA6gqHjpvH1pm389dff\\njBrUx2WHp8WR60azbg2cN1BKDRWRO4EuSqlHsx4D24HH0RXSh0qpiSLyCTBBKbVZRMYAaegB8ayB\\n82lAhPUWfa31HwHxwGJgIHpM6hagv1Jqr4j0B4KUUqOvlLf4adbh4Pb1Lk5ScKpHtAfg4LavXRvk\\nGrFVlitW4RmxXzn8D9ojPMqpA5bXTSdV3DCdlGMxndQ/x9mdlDncMxjcCTe8Tup6Gzg3GAzFDFNJ\\nGQzuhBsO35hOymAwFCoi8grQAsgEBiqlvrdb91/gReACsFwpNedq+zOHewaDO+Hi66REpB1QWynV\\nEngCmGW3zgOYA3QG2gJ3iUiVq30k00kZDIbCpCPwIYBS6kfAX0Syrg4OAk4rpeKVUhnAOiDqajs0\\nh3sGgzvh+jGpiuiLo7OIt55LtR77iEht4BDQAdhwtR2aSspgMDiS7GuqrPtmH0Ff6LwaOEgBbhEz\\nlVRRxQY46faUf40NuHiRzORTrk5yDVh/GyWceueS43H9dVIn0JVTFpXRN/QDoJT6Bn1vKyIyEV1R\\nXZFi8ldgMBiKCWvRt5khIk2AE0qptKyVIvK5iASLiBdwF3qOtitiKimDwZ3IcO2YlHWf604R2Yye\\nQLJfnim3F6I7skxgolIq4Wr7NJ2UwWAoVJRSeSdzj7NbtwpYdS37M52UweBOuH5MqtAxY1IGg6FI\\nYyopg8GdMJWUwWAwOBdTSRkMboQ7TmJpOik3Y+KseezeZ2nLB/bJo1nfzLwllmY9qj097/svKz/5\\nnP/7Yl32NvvUz+z68iPn5V38LnHqN2w2iH7iQRrWrp6Td1ss81d+TOmSJekceTM9OncEYOqS99i5\\n/xcuZlyk17130KllU6flheKnsi/uFKtOSkTuU0p94Oocl8N+7nRXvP/22D1aAb5gptasT5zGigUz\\nAUsB/socVi16FT/fcvQaGkNUZCu63Hk7Xe68Pfv1X3z9jfPy7lUcPnGS5ZNjOHD0BDFz3mD55Jjs\\nvOMWLuWDaaPw8/Gm99hX6BgRzuHfT/LLkeMsnxxDcuoZ7hsy2qmdVJFX2ZsxKdchIqFo47DhMmzd\\nGUtUZCsgS7OexpmzZwFITknRmnV/Pzw8PLRmfUdsrte/+uZS+jzaw3l59+yn481NdN4bK5N69ixn\\n0s/pvKln8PHyJMC3HB4eHrRoVJ8te/bTrL4wY1hfAMp5eZJ+/gIXLzrvD/NyKnsgl8o+u413xrJl\\nR2y2yj44KNCpKnt3oDhVUnOBCBHJAJYC1dHTPCymAFpzIAV4Dyhj/fQDfrD2VQ3YDNyvlKoiIvXR\\n895kog0wjwJ+aOHnAaAxEKuUelJEGgJvAUnWOgBEpB+XattHAzWs7O0tOWmhEZ+YRJjYa9b9iE/M\\n0qz7aQV4lmZ9124iwhtnb/vDj4qKweVzyUQdTcLpFMJqhubkLedDfHIK3p5lCfD14ey58xw6cZKQ\\n4EC2//AjzRvUpUQJDzxLlAHgg3Ubadu0ESVKOO+7tsir7M2YlEuZCvQH9gJ1lVKRIhJMwbXmR4Bj\\nSqknrG3rAP8BbrBU6XcCg6zXzgaeUkr9IiJ90R3aMqApWqd+CjgmIn7ACHTn+H8iMg9ARKqj71/K\\nq20HKK2UinREA+XlEgV4zDBiJk7D2zu3Zh3g/Y8/557bOzkj1mXJm3fiM0/y4pzFeHuWJaRC+Vx/\\ngOu2xfLBV9/y+qghroiaTXFT2RdHis3hXh62W7+vRWu+BWgpIvOBWkqpL4B6WKp04DNyLMMRwEKr\\nInsIqGA9/6tS6g9rwq4T1n7ro6swyJkbJ4J8tO15shc6wUGBxCfaa9YTKR9kp1kPb8SyV6ezYMpY\\nvL29CKlUIXvd9tg9hDes76ho+ecN8CPBTul+Kvk0wQE5BvqIBsLSCS8w/8VB+HiWpXJwEADfxe5l\\nwcpPWDBiMD5enpfs16GZi7rK3g0NxsW1k/rT+l1grblS6nf0YdoqoI+IjETP15HV6pnWD0A60MFS\\nrrdUSj2Tzz6xXm+/j6z2/JPLa9v/xEG0jmjK2g26b9aa9Tza8iHRJCYnk37uHBs2baVVMz0edDIh\\nEc+yNzjdqtv6pgas2bJD5z1wmGB/P7zKls1e33vMdBJPp5J+/gLrv4+jVeP6pJ1NZ+qS95gXMxA/\\nH2+n5oXip7J3B4rT4V4Gl+YNooBacxGJAkoppT4Xkf3Aq+ixpC7WJp3s9h+HPhT8XES6oWcUPED+\\nKKAZsAY90yDomQkn56NtdyhNGoYRJnXo9vQgrQB/tj+rPlurFeDt2tD17s48MfgFrS1/qFu2Zj0+\\nIZFAfz9Hx7uE8Lq1CKtRje7Dx+NhszGid09Wf/0d3p5lubVFU7rc2o4nX5qm897XGf9yPry3dgPJ\\nqWkMfnle9n4mDXySyuUDr/BOhUeRV9m7eBYER1BsDMYiUh79x/8BcMDSnIdSQK058CZ6kPxvdIc3\\nCvgeXVmVQx+q9VZKBYtIPeA1a7tz6IqtHLBSKdXMyrMD3cEFAG8Ax4HfAG9L396XS7Xto7EU7Vf7\\nvKFVq2Qe3LnpapsVCao3bQ0XL/LblytdHaXA1LjtAQCKiyU6C1uF6lccyLq4YbnD/6BLtO9mNOvO\\nQkQC0Id1H4hICLBOKVXX1bnAdFKOxm07qfXvOL6T6vCg0aw7kTTgfhEZhh5PGuziPAaDIQ/XdSel\\nlPoLfUmBweAeuOGRUXE9u2cwGK4TrutKymBwO8y9ewaDweBcTCVlMLgTZkzKYDAYnIuppAwGdyLD\\n/cakTCdVZMm6LbA4YIMSJfGoWP3qmxYVsgaYzyS7Nse1UqEYtXEhYTopg8GdMGNSBoPB4FxMJWUw\\nuBPmOimDwWBwLqaSMhjcCTcckzKdlMHgTrjhJQjmcM9gMBRpTCVlMLgT5nDPUNTRmvUfLc1633w0\\n68vsNOv/szTrX2VvozXrHzst74QZc4nbtx8bNqIH96dR/ZyJUb/a+B3z3lxK6VKluCPqFnp2vYez\\n6ed4fsxEUtLS+OvPv+j3xMNEtohwWl6Aia+9xe6ffsVmg5inHqFhnZrZ69Zt2cG85at1G7drSc+7\\nbuPc+Qu8MH0eCadT+PPPv+jT/V46WFJUw9Upkp2UiFQEXlJKPZXn+ZeBvUqpN+2e87aeCxWRQ0AD\\npdQZJ2TsopQqUvPlbo+N0wrwBbM4cOiwpVmfBeSnWY8mKrJ1Hs16nHM167t2c/joMVYsnKvzjp/C\\nioVzc/JOm8XqN1/TeZ99nqh2bfjqm++oXvVGhvTtxcn4BB7p/yxfrHjLeZl/2M+h43+wYvoYDhw5\\nTvSMBayYPiYn87w3WDV7In4+3vQaOZmols3Yte9nGtSuwZNd7+b4yXgej5nguE7KXILgHCy33VNX\\n39KlONz+cq3k1qxXy6MAT8HH2yuPZn1XrtdrzXpPp+XdsmMXUe3aZOdNSbXTwp9OoZydFr5FsyZs\\n/n4n/n6+nE7VzrrUtLRsG4uz2Lp7H1Etm+nMVUNIPXOWM+npOnNqGj5eXtlq+JaNw9gcu5fO7Vry\\nZNe7AfgjIZGKQc6zRLsDLqmkRORRoB3a4hIGxADd0aLNHsBJLDOLiPQEnkdLPs8Be0WkHNoacwPw\\nXT77rwwsQiuuLgJPKqWO5NlmPNrXVwKYo5R6V0Qao3Xuf6FNMV3R86AvBSqh9eyjgIZAYxFZpZS6\\n9zL7ehPt2AsEPkbbjMsDAkxVSi36N22YH/GJyYRJnezlAD/ffDTrxwipVJFtu+KICG+Uva1LNOtJ\\nSYTVtcvr70d8YpLO6+/H2fT0nLw7dxPR5CZ6P9SdVZ9+wa1depCadoYF0yY6LS9AfPJpwmrl3D8X\\n4OtDfFIK3p6eBPiW4+y5cxw6/jshFcqzbc9+IhrlCFe7DRnJyYQk5o8e5riAbqi0cmUlVRu4G5gI\\nvICWe05Ed1YAiIgNmAB0tLatZa3qiT7EiwR257PvscA0pVRHtPNuhP1KEYkEqiml2qL1Vy+KSFkg\\nGBiglOqANhv3QHdIQda2twEBSqmpQIrVQV1uXwBJSqn7rMcN0br3/wEDrrm1/gGX06z3jx5tKcBz\\nttWa9ducEeuyXJJ3xHCix0+h//MjqFK5ImRm8n9ffEnlisF8uXIZS+ZMY8y0mS5MnHuc2mazMWlI\\nH2JmLKD/2OlUqVg+12daPm0Mr44cyrCpc7meLU3Xiis7qR1KqUzgd2CPUuoiuoKyr98DgTSl1ClL\\nmpDleMpPbW5PK2C0pUl/gdz69az1Laz1a9DtUMl6/wki8g26swwEfgJ8RORtdCe0vID7gtxK9S3W\\nZ8zSvhc6V9esN2bZq6+wYMq4fDTrcc7XrAcFkZA3b2DOP1VEk5t4Z/4sFkybiI+XNyGVKrJrz17a\\n3NwcgLq1a3EqIZGLFy86L3OAP/HJp3MyJyVTPiBHrBrRsD7Lpo5mwUvP4e3pSUiF8uz95Td+j08E\\noF7NUC5ezCApJdUxAY1mvVD5+zKPbXke27eKRz7P5/cZ/gS6WorzSKXUvfmsX2SnQa+nlPoNmAnM\\nVEq1AxYAKKXSgRbWcmfg9QLuK2vd1T5jofHPNesJeJYt63zNekQz1qzfaOX9Wef1ysn75ODnSUzS\\neddv2kzL5k2pViWEuH0/AnD89z/wKluWEiVKOC9zk0as/W6bzvzrQYID/PH2zFHD9xoxicTTKaSf\\nP8+G7btodVMDduz9iTdWfQJAQvJp0s+fx7+cj9MyF3eK5Nk9OxIBXxHxA84CrYEt5KjNPyBHbW7P\\nNvRh1TwRuQWoqJR6J8/6l0VkMnrcaqpSagB6jOyAiJRBd0hbRaQJUF8ptVREtgFZNkmPq+zL6WjN\\nem26PT3Q0qwPYNVna7QCvF0but59O08MHp6PZj3JJZr1Jo0aEFa3Dt169cfmYWPU0IGs+vQLnbd9\\nJPf/9w4eHzQMGzZ6P/wgAX6+PPC/u4geP5mefQby98WLjH7OuarEJvXrEFa7Bt2GjMTD5sHIvo+x\\n6stvtMq+VXO6/ucWnnhxIjag9/3/xd+3HN06RxEzYwE9ho3m/IU/GdH3Mcdp1t3wMLJId1JKqQxL\\nTQ5QHbEAACAASURBVP4NcAjYa616C1gtIuvQA+d5/2VGA2+ISHdr3aN59rtZRNajOzwb8Kq1ajbw\\nIXDAejwHfQjXU0SeQg/CT7W2jRWR7UqpiMvsyyUM6fNkruW6tXOu4enULpJO7SIveU2DunVYOG2C\\nw7Plx9C+vXMt161dK/txp/Zt6dS+ba71Xp5lmTl+tDOiXZYhj3XPtVy3RrXsx51aR9Cpde7rtm4o\\nU5ppz7vke8stuK4160WZ0Ko3Fi/NOnBo9zYXJyk4oY30ZQQHv/nExUmuDVvNJlfWrK+a6XjN+r0D\\nnTplbJG8TspgMBiyKNKHewaD4Rox10kZDAaDczGVlMHgTph79wwGg8G5mErKYHAn3PBsvamkDAZD\\nkcZUUgaDO+GGZ/dMJ1WEsdmKi2YdyMwk889zrk5RcLLa9gYv1+YwXBXTSRkM7oQ5u2cwGAzOxVRS\\nBoM7Yc7uGQwGg3MxlZTB4E6YMSmDwWBwLqaSMhjcCTe8TspUUgaDoUhjKik3Y8LMV4mzNOvRg/rS\\nqJ6dtvzbTcx7cxmlS5fijo4d6Nnlf7z/8ed8tObL7G32/vQzsV85b7bKiXNeY/e+n7QWfsBTNKyX\\n4+Fb990W5r21nNKlS9H5lnb0vPcuAD7+cj2vv7uSEiVK8MzjPWnf0sma9bmvs/tHhQ0bMf170bBu\\n7ZzMm7Yyb+l7WrPeIZKe99zJtt0/MOilydQKrQpAnerVGPGMg9y3bjgmZTqpAiIi9ymlPnB1jiux\\nPTaOw8eOs+K12VpbPuFlVrw2G7AU4NPnsHrxPK0tH/ICUW1b0/Wu2+l6V45m/XNnatZ3/6C18POm\\nc+DQEaInz2DFvOk5eWfMY9Xrs/Er50Ov50YS1aYlZcqUZs6by/hg4SzSz51n9uKlTu2ktsft5dDx\\nE6yYM5UDh48SPXUWK+ZMzck86zVWLXhFZx7+ElFtWgDQvHEDZo0uctLrYoE53CsAIhKKnbS0qLJl\\nRyxRkXq+8Zqh1UjJo1kvZ6dZ19ry3Jr1uW8spa8TNetbd+4mqk1LK29VUs+c4cxZS1mekqq18H6+\\nWlnepDGbd8ayZWcsrZqG4+3pSXBgAGOHPeO0vABbd8UR1Vp3PDWr3Wip7C+XuRGbd8Y5NR+ZmY7/\\ncTKmkioYc4EIEclAK9erA1HAYqAK4AWMVkp9YklCv0SLRIOAu4AU4D20pr0M0E8ptSvvm/xbtLY8\\n59Djypr13USEN87eds+PP1HJyZr1+KRkwiTHDhPg60t8UhLeXp4E+PlqZfmx44RUrMC22D3ZWvhz\\nFy7Q54WXSE07Q//HetCy6U3OzVwnx8AT4OdLfFJyTub0cxw6doKQisFs2/0DEY0bElIxmAOHj9In\\nZhwpaWn0e7gbrZuFOy1zccd0UgVjKtAfrdSqq5SKFJFgYK1SaomI1ADeB7IGc1KVUh1FZBJarX4E\\nOKaUesLatk4+71HoZNqZvmw2G5NefI7oCS/j4+1FlUoVc30rrvz4c+7p3MkZsS7LJXlfGELMpBl4\\ne3taWni9/nRKKnPGjeDEyVM8Mmg4X7/3pstuxr5EDT98EDFTZ+Ht5UmVihXIzMwkNKQy/R7uxu3t\\n23D0xB88MiSGNW8vcIyMNcP9xqTM4d61k6VOTwaai8gmYAm5Ve5ZAtEspfoWoKWIzAdqKaW+cESw\\n4KBAEhKTs5e1tjy3Zv2deTNYMHU8Pl5ehFSqmL1u2644whuGOSLWFfIGEJ9knzcpd96bGrJszlQW\\nTHoJb29PQipWINDfj/AG9ShZsgRVQyrh5VmWpNMpzsscGEB8kp1mPTGJ8oH+OZkbN2DZzEksmDAS\\nby9PQioGU6F8IJ07RGKz2agaUokgf39OJSQ6LXNxx3RS106WOv1BIACIBO7Js00upbpS6negMbAK\\n6CMiIx0RLLe2/JdLteVDXsjWrK/ftJWWWZr1+AS8PF2gWW/ehLUbvtN5f/6V4KCA3Fr4YSNITD5N\\n+rnzbNi8nVZNb6JN8yZs2xVHRkYGySmppJ87h79vOedlbhbO2o2brMwHCA7Mk3n46JzMW76nVdOb\\n+PirDSxasRrQh4uJyacJDgrMd///GjMmdd2SwaVtFQQctCzL96IV6/kiIlFAKaXU5yKyHwdZjps0\\nDCOsbm26PfWM1pY/+wyrPl2Dj7fWrN9/V2ceHzQcmw16P9SdgCzNemISAa7QrDeor7XwfYfg4WFj\\n5KC+rPr8S61Zb9uKrnf9hyeGvqiV5T3uz9bCd2rfhgf6PAvAiwP7OE5Znm/meoTVrkW3/s/pzAOf\\nZtUX67RmPbIlXTt34onnRuk2frAL/r7l6NAqgqHjpvH15m389dffjBrUx+lfCMUZYzAuACJSHtgJ\\nfAAcUErNsc74fQTEowfQB6LHpG4B+iul9opIf3Rn9iZ6wP1vdIc3Sin1bd73sSe06o2Zh3ZtdswH\\nKmRCm7SCzEwOfu+8yxf+LdWba337wa1fuzjJtWELkSsbjBeNdLzB+IkxTh0ANJVUAVBKxQNV8zx3\\nCGhk99Qy6/cYu23m2K1v46h8BoM7Yzopg8GdcMMjIzNwbjAYijSmkjIY3AlznZTBYDA4F1NJGQzu\\nhBmTMhgMBudiKimDwZ1ww/mkTCVlMBiKNKaSKsoUJ836xb/IPKpcnaLg2PT3s+0GbxcHKWTMHOcG\\ng8HgXEwlZTC4E244JmU6KYPBnTCXIBgMBoNzMZWUweBOmErKYDAYnIuppAwGd8LcYGwwGAzOxVRS\\nbsaEma8St3e/pVnvR6P6dpr1jXaa9agszfpnfPTFV9nb7P1JEbvuU1dEZ+LS1cT9elhn73kPDWvm\\nTIa6bucPzP/wS0qXKknnFuH06BTpkowAE2bOJW7vj9hsED2ofz5tvNSuje+x2theZa+IXfeZY8K5\\n4ZjUdd9JicijQAOl1NACbj8aSMgzNTAi8n9Kqf8WfsKCsz02jsNHj7Fi4RytWR8/lRULdUytWZ/N\\n6jfma836s1ma9c50vatz9us/X7fBNdl//JXDfySwfPQgDhw/SczCd1k+elB29nFLVvHBuCH4eXvS\\ne+prdGzakIqBzpdH6DY+foU2nsXqNxZYbTycqLZtikwbF1fM4V4h4eoOCmDLjl1Etb2MZv10CuW8\\nve006+Fs/n5nrtfPXfw2fR97yOm5Abbu+4WOTRsCUDOkAqlnz3Em/TwAyWln8fEsS0A5b509rDZb\\n9v3skpyXtnHaFdq4ST5t/JZj29gordwXEemHdullAB8qpaaJSDhaP3XB+nnA2ry5iKwFKgNDlVJf\\niEiCUirI0leNRfv5koH7gVZoA3IGUA9YqZR6qbA/Q0JiMmGSI0cO8PclPjFJa9b9/Tibnn55zfr+\\nn6hUwbmadXsSUlIJq14leznAx4v4lFS8PW8goJw3Z8+f59Af8YQEBbB9/680r1frCntzYM7EpDxt\\n7HeNbRzssjYurphOSlMdaEqO0WWTiLwPPAa8qpR6W0RuAbKUv8FKqU4i0gBtL7Y3EvsDDyqlDorI\\nW8BtQBoQAdRFV6+HgELvpPJi/6Vns9mYNOJ5oidMxcfLiyqVKmFnNWflx59xT+fbHB2pwNh/X9ts\\nNiY+9SAvvvYu3p5lCSkfWGTGXi7RrI8YbtfGeVX2TmjjItIuhYnppDRNgFLAemvZBwgF/g+YJyJ1\\ngBVKqZ9EBGADgOXWuzHPvuKB10WkJFAD+BrdSe1SSqUDWPsodIKDAklISspe1pr1HFOu1qzPBGDa\\nvNcJqVQhe9222DhefHaAQ3IVhGA/XxJOp2Uvn0pOIdgvx0wcUa8WS0c+A8D0FZ9QubxrqpFra+OF\\nuVX2sbtd2sbOQkReAVqgv2sGKqW+t1vXD+gJXAR2KKUGXW1/ZkxKkwF8qpRqb/00VEptVEqtA5oD\\nPwFLRKSDtb3911Xer67FaDloO3Qnl8XfOJjWN9tr1n++VLP+7HASkyzN+ndbaNm8KWBp1ss6X7Nu\\nT+uGwprv4wDYd/Aowf6+eJW9IXt97ykLSExJI/38BdbH7qNVgzqX25Vjcxb1Ns7IcPzPFRCRdkBt\\npVRL4Alglt26csAwIFIp1QaoLyItrvaRTCWl+QboICKewDlgBjAc3cifKqWWiYgNCLe2bwNMEZFG\\nwOE8+/IFjoiIH9AB2OOMDwCWZl3q0K33AGweHowa8gyrPv0CH29vrVm/+w4eH/w8Nmz0ftj1mnV7\\nwutUJyy0Ct1fmomHzcaIR+5j9cbteJe9gVubN6JLh5Y8OXk+NpuN3ndF4e/jmnmgmjRsoNXwvftb\\nbTzQamMvbm0Xyf13d+bxwc9Zbfxgnjb2d0lmJ9MR+BBAKfWjiPiLSDmlVCp6nPZPwFtEzgCeQNLl\\nd6UxnZQmCd0xbUSXoR8qpc6JyK/A+yKSgh44fwzoA5wSkY/Qh3MD8+xrLrAJ+BmYAowGop3xIQCG\\n9u2Va7lu7ZrZjzu1j6RT+0uvL2pQtw6vT5/k8GxXY0i3u3It160Wkv24U/NGdGreKO9LXMLQvr1z\\nLedu47Z0at/2ktc4rY1dPyZVEbA/pRlvPZeqlDovIi8Bv6GLgeVKqauepr3uOyml1Jt2i6/mWfcF\\nuQfFQXc6+e0nyPo9Ehhpt2qJ9fvdvNsaDNcB2dPLWod70UAdIBX4WkQaK6XirrSD676TMhjcCtdX\\nUifIOQsO+jKd363H9YDflFIJACLyLfqs+hU7KTNwbjAYCpO1QBcAEWkCnFBKZZ22PQTUE5Gy1nIz\\n4Jer7dBUUgaDO+HiWRCUUptFZKeIbEafNe9n3XqWopRaLSJTgfUi8jewWSn17dX2aTopg8FQqCil\\nhud5Ks5u3QJgwbXsz3RSBoM74foxqULHjEkZDIYijamkDAZ3wlRSBoPB4FxMJWUoHEqVwUOauzpF\\nwbmob6XMOHnItTmuEY/AkCtvYCopg8FgcC6mkjIY3AljizEYDAbnYiopg8GdMGNSBoPB4FxMJWUw\\nuBOmkjIYDAbnYiopg8GdcMNKynRSbkZx06xPmD6LuL37sGEjeshAGoXVy8n7zbfMW7RE5+0URc/7\\n7yMjI4NRE6fyy4GDlCpVktEvDKNmaDWn5QWYuPhd4tRvWrP+xIM0rF09e926bbHMX/kxpUuWpHPk\\nzfTo3BGAqUveY+f+X7iYcZFe995Bp5ZNnZq5OGM6KTtE5D6l1AcF3HYGMFMpdfAy6w+h9e1n/ul7\\nXCvFTbO+fWeszrt4AQcOHiJ67ERWLF6Qk3fKK6xeugg/X196DRxKVLtIftj/I2lnzrJ88XyOHDvO\\n+GkzWfDKFOdl3qs4fOIkyyfHcODoCWLmvMHyyTHZmcctXMoH00bh5+NN77Gv0DEinMO/n+SXI8dZ\\nPjmG5NQz3DdktMM6qUxznZT7IiKhQPeCbq+UGnS5Dqqw3uNaKW6a9S3f7ySqnRZD1KweSkpqGmfO\\n2OX18SbA31/nbd6Uzdt3cOjosexqq2qVEE78/gcXL150Wuate/bT8eYmOvONlUk9e5Yz6ed05tQz\\n+Hh5EuBbTmduVJ8te/bTrL4wY1hfAMp5eZJ+/gIXL7pfZ+IorttKSkSqAkvRdpiSaC9eAxEZie68\\na6DNxlFol14VwAsYrZT6REQ2oNXpp4H30aqejWinWHvrbfqLSGdr/7ehTTIRIjJSKTWmsD9TcdOs\\nJyQmElYvR5SqleWJeHvb5T1ylJDKldi2YxcRTcORWjVZ8u57PNL9fg4fPc7R4ydIPp1CkJNyJ5xO\\nIaxmaE7mcj7EJ6fg7VmWAF8fzp47z6ETJwkJDmT7Dz/SvEFdSpTwwLNEGQA+WLeRtk0bUaKEg+oD\\nNxyTup4rqS7Al0qpDmgt1RrgG7vOo7RSKhLt0VtryT7v51I9+mDgPWt9mTzr9iql2qLdfB2BqXne\\nw6FcTrPef/jIIqlZv0RZPiqG6LET6T8smiqVK0FmJu1at6RhWD169O7PkuXvUaN6tVyvc3Xmic88\\nyYtzFjNg0hxCKpTP9Y+wblssH3z1LSN69XBF1GLLdVtJoSeMX21JPFcCW9ETw2ex3fqdDDQXkd7o\\nOZsDyU09YIX1+CMgwm7dd9bv4+jO7nShpc+H4qZZDy4fREJiYvbyqfgEygfl2L4imobzzkJtGZs2\\nZz4hlSsBMLhPjvcu6n/3ExjgPOlmcIAfCadTspdPJZ8mOMA3ezmigbB0wgsATH97JZWD9ef5LnYv\\nC1Z+wmsjB+NjZzwudEwl5T4opfYCjYFvgYlA1Tyb/Gn9fhAIACKBe/LZlQ3decGlyvW/82znUIq8\\nAvySvBGssQbq9/2kCC4flDvvM0Ny8n67iZYRzfjp5194YcwEADZu3kr9unXw8HDef+PWNzVgzZYd\\nOvOBwwT7++FVtmz2+t5jppN4OlXr4L+Po1Xj+qSdTWfqkveYFzMQPxeZl4sz120lJSLd0A6wD0Uk\\nARiDFhbmJQg4qJTKEJF7gdJ51h9AV2A7gNuv8rYZOLDNi5tmvUnjhoTVFbo9/jQ2DxujnnuWVR9/\\nppXlHdpx///u5vH+g7Va/dGHCPDzw69cOTIzMunySC/KlCnNy2NGXv2NCpHwurUIq1GN7sPHax18\\n756s/vo7vD3LcmuLpnS5tR1PvjRNZ76vM/7lfHhv7QaSU9MY/PK87P1MGvgklcvnLcoLATespGyu\\nPJ53JZYTbD5wBj14PgZYBnwApAAJSqk51hm5j9C66MXo8atPgFvQA+cXgPeARGAb0EIp1dH+EgQR\\neRnYC3yKVlB/oJQafKV8oVVvzDwUu6UwP7LDCA1vCTYbh37YefWNiwih9bSy/bcvV7o4ybXhUb/1\\nFSvyv2N6OPwPuuT4ZQ4/Ksj1fs58s6KEUmoXuceP4NJDPpRSh4BGdk8ts36PARCRMKC/UmqTiHQH\\nyluvC7Xbx9ArvYfBUGi44XVS120nVYikAQtEJBN9OPeYi/MYDG6F6aT+JUqpI0AbV+cwGAC3HJO6\\naiclIt7Ao0B99NmrPcBbSqlzjo1mMBgMBauklgNJwCb0afRI9Fms/zkwl8Fg+Cdcj5UU4K+UutNu\\neb6IfOuoQAaDwWBPQa6COygiFbMWRKQC8IvjIhkMhn9MZqbjf5xMQSqpasABEdmH7tTqAvtFZCOA\\ndW+awWAwOISCdFIvOjyFwWAoHK7T66RK5PekUurrQs5iMBj+LdfpwPkIu8elgTD0mT7TSTmaks69\\n4fdf45Hv91kRxfpjvviXa2MYrspVOylrvqVsRCQYPWuAwWAoarhhJXXNc1wopU6h51AyGAwGh1OQ\\nK87fJvc8STeiZw0wGAxFDTespAoyJvWV3eNM9JxLax0Tx2AwGHJz1cM9pdQS4Bv03f5pwC6lVLqj\\ngxkMhn9ARobjf5zMVTspEXkaWA90A3oAG0TkEUcHMxgMBijY4d5DQD2l1HkAEfFCHwIucWQwg8Hw\\nD3DDMamCnN37O6uDAlBKnSVHUmAwGAwOpSCV1FERmQ18aS3fBhxxXCTDv2HC9NnE7d2PzQbRQ56h\\nUf2cq0W++uZb5i1+m9KlS3HHrbfQ8/77OJuezvOjx5OSdoa//vyTfk8+RmTLvLMqOzDvtBnE/bBP\\n5x06mEZh9XPybtjIvEVvUrpUKe64LYqeD3QlIyODUROm8MuBA5QqVYrRLzxHzeqhTssLMPGN94j7\\n5Tds2Ih+/AEa1sp5/3XbdzP/g88oXaoknVs3p8ft+jLDn48cp//kV3nkzqjs5xzCdVpJ9UZ74x5D\\nT3532HrOLRGRQ9ZEf/bP/UdE+rgqU0HZvms3h48eY8XieYx/8XnGvzwre11GRgZjp85g4YwpLFsw\\nm/XfbuaPk6dY/cnnVK9WlbfnzWTmpLGMnz7rCu9QyHl37uLwkaOseHMh40fGMH7qK7nzTpnGwlnT\\nWPb6PNZv3MQfJ0+xbsNG0s6cYfkbCxk/IpopM2Y7LS/A9n0/c/j3UyyfMJxxfR9m/OLluTKPW7Sc\\nBdEDeHvMUNbv2MMficmkn7/A+EXLadGwrlOzugsFqaQeUEpNcniSIoxS6gtXZygIW77fSVS7SABq\\nVg8lJS2NM2fO4u3tRfLpFMr5eGerq1o0b8rm73fi7+eH+vU3AFJT0/D3873c7gs/7/YdRLVvl5M3\\nNdUu72nKeXsT4K/Fny0imrF52/ckJidnV1tVb6zCid//4OLFi5Qo4Zxbcrb+8BMdI27SmatUIvVM\\nOmfSz+HtWZbktDP4eGndOkCLhnXZsudH7mp7MwuiB/D6h2scH9ANK6mCdFL3isgqpVTK1Td1PSLy\\nKHrO8fKAoNXmB4AJwF/AMeBxoHve7ZRSi6zdRItIJFrueQ96FtIGwBz0CYMDaLForFLqSRFpDMy1\\n9p8BdAXKAUvRyqy5wP1KqYesjAuBj5VSHxXmZ09ITCKsbp3s5QA/P+ITk/D29iLA34+z6ec4dOQo\\nIZUrsW1nLBFNbqL3Iz1Y9cnn3Hpvd1JT01jwyuTCjHT1vPVyqosAf3/iExOtvP6cTU/XeStVYtuO\\nnUQ0bYLUrsWSZct55MEHOHz0GEePnyD5dApBgQHOyXw6hbAaOcKfgHI+xJ9OxduzLAHlfDh77gKH\\nfj9JSPkgtu9VNA+rQ8kSJSjppE7UHSlIJ1UWOCQiCrsB8yI+j1RDoBVQGz398Q3ArUqpoyIyB20l\\nzsxnu6xOao9SKtry5T2Evj4si6bAA8Ap4JilaQ8GBiilYkVkDPpSjY+BcLTC6jQwTURuQLdha6Cf\\noz58Fpl2NwrYbDYmjYomeuxkfLy9qFK5EpDJ/32+lsoVK7Bo1sv89POvRI+bzKq3Fjo6Wv55M/Pk\\nfWkE0S+Nt/JWhsxM2rVuya64PfTo1QepVYsa1UNxpTsybxtP7P8oL859C2/PsoRUCLrCKx1ExvVZ\\nSY11eIrCZ4tS6qKIHAN8gfNKqaPWuvVAO2BXPtthtw3AdqAt2k6cxa9KqT8AROSE9bqTwGQR8QQq\\nk+PmO6CUSrS2/QToDPwOfKuUKvQzpMFBgSQkJmUvn4pPoHxQjiU3oslNvLNwDgDT5i4gpFIltu/a\\nTZsWzQGoW6cWpxISnHb4FFw+iITExJy8CXnyNm3CO4vm67yzXyWkciUABvd9KnubqLu7EBjg7/Cs\\nWQT7+5FwOkd0fSophWD/nP86EWF1WDpuGADTl612jKX4OqMgA+cl8vnJFJHKjgz2L/nb7nEAWiCR\\nRWn0IVne7ey3ybzM47yvyXrdTGCmUqodsMBunX1H9Bb6MPBu4J0rhf+ntG7RnDVffwPAvp8UweWD\\n8PbyzF7/5MBhJCYlk37uHOu/3UzLiKZUuzGEuL0/AnD89z/wKlvWaeM7rVtEsGad/j7Y96MiOCgI\\nby+vnLwDBpOYlGTl3UTLiOb89PMvvPDSOAA2bt5C/bp18PC45vvk/3nmxvVZs3WXzvzbEYIDfPEq\\ne0P2+t7jZpGYkkr6+Qus37GHVo2cey9+Zmamw3+cTUEqqRj04cnP6BuLBa0Kry4iE5VScx2YrzBI\\nRneqVS1HXjvgO6782SPRuvUWwI8FeI8g9BTLZdDV0ta8GyildotICPrQMPraPkLBaNKoIWF169Dt\\niT7YPDwYNWwwqz75HB8vL27t0Jb7/3cnjw8Ygs1mo/ejPQjw8+OBe+4meuxkej41gL8vXmT08CGO\\niJZ/3saNCKtbl26P9cJm82DU8KGs+uhTfLy9uPWW9tx/z395vN8gbDbo/ejDBPj74edbjsyMTLo8\\n/DhlSpfh5XGjnZYXILxuTcJqVKV79GQ8PGyMeLI7q9dvxtuzLLfeHE6XqDY8OXYmNmz0vuc/+Jfz\\nZt+Bw0xespLj8YmULFGCNVt2MWvY0/j5eF39DQ0F6qSOAM8opfYBiEh9YABwK/qevqLeSQH0At4R\\nkb/Rg97LgZ5X2D7M7pKD0cC9V9n/bOBDa9+z0QPsK/LZbi3go5Ry2NfR0P5P51quW6dW9uNOHdrR\\nqUO7XOu9PD2ZOfElR8W5KkOf6ZtruW6d2tmPO93Snk63tM+13sPDg0kvjcCVDOmZ+79D3dAbsx93\\natGETi2a5FofVrMab41xUud/nY5J1crqoACUUvtFpL5S6ryIFLkpW5RSb9o9PgOEWot5LcP5bqeU\\nCuVS3rR73MzudVmPX7N+slidd1sRsQHtgdy9iMFguCIF6aTSrbNcG9BjOa2A0iJyG/r0uuEqiEgo\\n+vDxPaXUry6OY3BnrtPrpLoDg4Gn0APtPwFdAC/06XnDVVBKHUJfumAwGK6RgsxxniQi04A66Erq\\nZ6VU6lVeZjAYXECmG45JFWQ+qWeBX9Gn2ecCv4qIwy9ENBgMBijY4d6jQM2s22JExJ/ic1bPYLi+\\ncMMxqYJcBXfc/r49pVQy+lS7wWAwOJzLVlIi8rj18IiIfISejTMDuAU9dYvBYChquOGY1NWuus4i\\nEX2zLEAK+syewWAwOJzLdlJKqcecGcRQzMnIIDM1wdUpCk4JrbD3qFzrKhsWL1w5I4SjKIgc9CiX\\n3mSLUqpqPpsbDAZDoVKQs3v2t5OUBjoCnpfZ1mAwuJLrbEwKAKXU4TxP/SIia4DpjolkMBgMORTk\\ncO+WPE9VBWo6Jo7BYPg3XJdjUsAI9JiUDX0JQjr6Pj6DwWBwOAW5mPMt9B38twJl0AICcWQog8Hw\\nD8nMdPyPkymod28h2pjyA3repfsdmMlgMBiyKUgndc6SBnQG3ldKZZDPJQkGg6EIkJHp+B8nU5Ax\\nKURkLnqe814i0hKtiDIUQYqbZn3i3IXs3q+w2WzE9O9FQztv4LrvtjJv6QpKlypF51va0vOeO9m2\\n+wcGjZ5ErVB9mV6dGqGMeMa5Q6QTZr5qtbGN6EH9aFQ/xx341cZNzHtzmW7jqA707PI/3v/4Mz76\\n4qvsbfb+pIhd96lTMxdnCtJJ9UB75mZZ+qdQnDAFroi0B/orpboUYNsuSqmVjs5UGFzL57pW7DXr\\nBw4eInrsZFYsngfkaNZXv70IP99y9Bo4jKh2kXz1zbdUr1aVIf2e4mR8Ao/0HcQX7y8t7Gj54tVk\\nvgAAIABJREFU5939A4eOnWDF3Jc5cPgo0VNmsmLuyzl5Zy1g1Wsz8CvnQ6/nRxPVpgUAzRs3YNZL\\nLzgl4yWZY+N0Gy+cw4FDh4keP5UVliYsIyODsdNns/qN+bqNn32BqLat6XpXZ7re1Tn79Z+v2+Cw\\nfO54du+qh3tKqd+VUjOUUspaflcpFef4aNfEcFcHKApcTrMO5NKse3h45NKsn07Rcxg6W7O+dVdc\\ndsdTs9qNpKad4czZdJ03JRUfby8C/Hzx8PCgZZPGbN6522nZLseWHbuIatsagJqh1UhJO8OZs3Zt\\n7G3Xxs3C2fz9zlyvn7v4bfo+Zia0vRYKdLjnQrxFZCn6jOL7wEq0iSUTbRV+FG2CaWyp4O8VkfHo\\nm6NLAHOUUu+KyJtoB14g0A0tTaiBPls5Uim1VkSeR1thMtAK9AmWaj1Lz37Ueq8HgJuVUv1FpCf6\\nivzl2FVHIpKglAoSkSi0XPVPtFrLoScciptmPT7pNGF2NpsAP1/ik5Lx9vIkwM9X5z12gpCKwWzb\\nvYeImxoSUrECBw4fpU/MWFJS0+j3SHdaNwu/wrsULgmJyYSJXRv7++o29spq43QOHT1GSKWKbNu1\\nm4jwxtnb7tn/E5UqlKe8I5Xw1+MV5y6mPlAXXfEdRDvznlJK/SIifYF+SqnxIvK81UFFAtWUUm0t\\nB94uEfnQ2leSUqq3iDyMNhq3swSnG9BTIw8FKqHdglmHs7OAjtYUylOArkqpt0XkYRFpAgwCooCb\\nLpPfH3hQKXVQRN4CbiO3st2hFHvN+vBBxEyZibeXJ1UqViAzM5PQkMr0e7g7t3dow9ETf/DIs9Gs\\nWfoapUuVclHmnMc2m41JI54nesJUfLy8qFKpUq5TTCs//ox7Ot/m/JDFnKLeSe1SSqVDthIqAlgo\\nIqCroO/zbN8KaCEiG6xlD3THA1qZDloztQFAKXVCRC6ISAC6SvsKbRdeJiIVgNrAKuv9vICs2/yf\\nATYCzyqlTlvr8yMeeF1ESqIrt69xYCdV7DTrQQHEJyXn5E1MonxgjjI94qaGLJulK7tpC5cQUrEC\\nFcoH0vkWfUhbNaQSQQH+nEpIpEqlig7PqzMHkpBk18YJiZQPtGvj8Ma8M2+mzjzvdUIqVchety02\\njhefHeDYgNfjmJSLyas0Twc6KKXaK6VaKqWeybP+T2CRtb69UqqeUuo3u3WQc/V8FqWBDKVUH3QF\\nVZEcfddxu301V0pNsV4TiO5sqtjt056sr/XF6MPAdsD/XcPn/kcUO816s3DWbtys8/78K8GBAXh7\\n5uTt9fwoEpNPk37uPBs2b6dV08Z8/OUGFq1YBUB8UjKJyacJtuuIHZ755masWb9RZ1Y/ExwUmLuN\\nnx2e08bfbaFlcy0JOhmfgFfZsi6r+IozRb2Syksc8B/gcxHpBsQrpdaR09luA14WkcnozmeqUirv\\nV9f3QAdguYjciO6MMkVkpFJqDDBGRNqiD/uwRKj7RWQAem73/cAUoC2wUkTeBVKxKjYRaQT4WO/l\\ni57Z1M96zz2F3B65KHaa9Qb1CKtTk279h+FhszFyYB9WffGVzhvZkq533MYTw0bqvA92xd/Xlw6t\\nIxg67mW+3rSNv/76m1GD+jr1D79JwzDCpA7deg/QbTzkGVZ9+gU+3t7c2q4N9999B48Pfl5r1h/u\\nToB1IiI+MYkAfz+H53NHW4ytqJ6yzHuqXkQS0APir6E7lnPo8Z4kEVmH1pdHWAPnUehq6VWl1JvW\\nwPlKpdQn1qHXfPRN0qWBF5RSG0VkNnAzWni6WSn1ooi0Aaahq7ATwMPoQ72SSqmJ1s3XzwJ3A18A\\n3sAm4D6lVA0RGWOt+xn4FK1sj7bWX/EShNCqN2Ye+mHHv2xF5xDasBlkZnJw29eujlJgqrfoCMCh\\n2C0uTnKNBFaxXWl1epc2Dv+D9lz53RUzFDZFtpO63jGdlGNx207qvtaO76Q+2OTUTqqoj0kZDIbr\\nnOI2JmUwGK5AZoarExQ+ppIyGAxFGlNJGQzuhBuOMZtOymBwJ9zwEgRzuGcwGIo0ppIyGNwId7yk\\nyFRSBoOhSGMqqaKKzQa2YvIdYrOBzYbNt7yrkxSci38BkHHiVxcHuTY8AqtceQMzJmUwGAzOxVRS\\nBoM7YcakDAaDwbmYSspgcCPccaoWU0kZDIYijamkDAZ3ogiMSYnIK0AL9Iy1A5VS31vPhwDL7Dat\\nAQxXSr1zpf2ZTspgMBQaItIOqK2Uaiki9dBTaLcEUEodB9pb25VET9P90dX2aQ73DAY3IjMj0+E/\\nV6Ej8CGAUupHwF9EyuWz3aPAB0qpM1fboamk3IwJ02cRt3cfNmxEDxlIo7A8mvVFS7QCvFMUPe+/\\nj4yMDEZNnMovBw5SqlTJ/2fvvsOjKvO/j78nFZJJhYQSSgDhBkIRpEgTpLnys+0KioCCFAtgAWSl\\ni0IUkCJFAUFApdp4Vt11YUWQ3jvIjQQDoacS0mjJ88eZJJMQAtFMyfB97ZXLmTlTPmHJzfeczJwP\\n40cOp0Z4VfvlnfoRBw4dNirLhw+hQUTd3LwbNjJ34WJL3k706t7NyBs5md+jThp5R71NjWrhdssL\\n8MHirzjw+0njz7jvs9S/L/f11+3cz7xv/4OXpwddWjWl56MPA3D89FkGT/6E3o91zLnNRZUHrBtR\\nYy23Jee7X3+g8908oUxSf4FSqo9Sauptti1RSj1WwO3FXq+ebeeefZaa9flEjh1B5LSPcrZlZmYy\\nYcoMFsz8kGWffsz6TVu4cPES637dxJWUVFYumkfk2JFMmfmxreIVkHcvp07HsOrzhUSOG0XklOl5\\n806eyoLZ01m2cB7rN2428m7YyJWUFFYuWUDkuNFMmTHbbnkBdh45zqnzl1j5/ggmDnyByEUr82Se\\n+NlK5o96jS/fe4v1uw9yIT6RtIyrRH62kgfr17Z9wKws238VzS2nGlZKtQCOaa3zL1wFkkXK/mxW\\nCX9LzXpyQTXrQbk16zt3Ex1zJmfaqlIpjHPnL3Dz5k1bRcybd+duOj78kJG3erV8tfBJ+Pv55eZt\\n1oStO3YSfTqGBvWMaatK5Uqcu2C/vADbDx2jQzOjC7ZGpQokp6SRkpZuZL6Sgp9vaYID/IzM9Wuz\\n7eBveHl6MH/Ua4TaoS3GCZzDmJyyVQTO57vPYxgdl3dFFqlioJSapZTarZT6XCm1XSkVbtn0uFLq\\nZ6XUAaVUY6XUcCyV8LbIERcfT5DVD0JwUCCx8fE5l1PT0og+HcP1GzfYsXsvcQkJ1KpRnc3bd3Lz\\n5k1ORp8m5uw5EpMu2yLerXnj4gkKyi0DNWrhs/MGkZqaSvTp01y/foMdu/cYeWvWYPO2HZa8p4g5\\nc5bEpCS75AWIS7pMsL85N7O/H7FJyTmXU9OvEn3+Itdv3GTnYU1cUjIe7u6U8vayT8DMLNt/FW4t\\nkN3w1Bg4p7XOX4jbFKOe7q7IMam/rprlqwkQAeyz2palte5o2e0brbV+OrsS3h7Bbqktf2c0oyZ8\\ngJ/ZbNSsZ2XRtlUL9h48RM+XBqNq1qB6taoOO93HLbXw741j1PhII29YRciCtq1asnf/QXr2fwVV\\n8z6qVwt36G/d82f+YHAfxnz8BWaf0oSVK+u4YA6itd6qlNqjlNqKUT03SCnVB7istV5tuVsF4NLd\\nPqcsUn9dI+C/WutM4JBSKtpq23rLf3cCk2wdJDSkLHGWSQSya9Zzf1CaPdCI5Qs+AWDanHmEVTQa\\n6Ie8+lLOfTo+9QxlgnOnG5vnjcuf16qy/IHGLF8038g7+5PcvINeyc37xNN2ywsQGhRIXFLuoZRL\\nCZcJDQrIud4sohZLJw4HYPqy1VQMsV+7MjjH+aS01vkPaRzIt71+UZ5Pdvf+OhPGvxjZsu7isk20\\nat6MNes2ALepWX99mFXN+hZaNGvCseO/M/K99wHYuHU7dWvXws3NPn8tWrVozpp1Rlffkd+OWfL6\\n5uYd/CbxCQlG3o2badGsqZF3/EQj75Zt1K2t7JYXoFXDuqzZvtfIfPI0ocEB+JYulbP9pYmziL+c\\nTFrGVdbvPkjLBnVu91TiLskk9ddFAQ8opUxAbcD69/dtgK8w3n37m+U2m/1ENW5Yn4jaiu59X8Hk\\nZuKdfw7lux/+g5/Zl04Pt+WZp56g7+Ahlpr15wkODCTQ35+szCy69h6At7cXU98bZ6t4BeRtQESd\\n2nTvM8DIO2I4333/o1FZ3r4dz/z9SfoOfMPI++ILBAcFEhjgT1ZWJl2f74u3lxdTI9+1W16ARrVr\\nEFG9Cs+Nmoybm4mx/Z9j9fqtmH1K06l5I7p2bE3/CTONmvW//40gfzNHok4x+fNvOBsbj4e7O2u2\\n7WXW8FcI9PO98wsWlRNMUsVNGoz/Asu+dj0gBKiDcTyqOfA4MAG4AoQDlYHntdaHrCvhC3vu8KpV\\nsqIP7SnsLk4jvP4DAEQfvetjoQ4XXsuYcE7+d+Ud7ulc3Oq3K7Q9+HK7hjb/gQ7YcMCuDcYySf0F\\nWuslSilv4FmtdW+llC9wDDivte5zm8d0sGdGcY+RsyCI/LTWV4GmSqndGAfKx2qtbzg4lhAuQyap\\nYqC1fs3RGYQA5/jtXnGTSUoI4dRkkhLChWRl3vk+JY1MUkIIpyaTlBAuRI5JCSGEnckkJYQLccFB\\nShYpp1ZSatazZdrvvE5/meXP1uRnvw8niz9HFikhXIgckxJCCDuTSUoIF+KCg5RMUkII5yaTlBAu\\nRI5JCSGEnckkJYQLccFBSiYpIYRzk0nKxbw/bSYHDltqy4e9eWtt+aIleHl6GTXrz3a11KxPMWrL\\nPTwZP2o4NcLD7Zz3iFVeq1r4DZus8nawyvthvrz2q4UH+GDeEvYf+x0TJka/2of66r6cbeu27mLu\\niu+MmvW2rej15N9Iz7jKyKkfE5d0mWvXrvNqj6d5+MEHbJIt0wVHKVmkbEgp9TegmtZ6rj1ez6hZ\\nj2HV4gVE/RHNqPciWbV4AWCpLf9wOquXLiYwIIABrw+lY7uHOHT0N0vN+qecPnOGyKkfMf+jApvj\\nbZT3DKsWf2rJ+z6rFn+aL+8iS95hVnlTWLloviXvTOZ/9KFd8gLsPHiU6LMXWPVRJFGnzzBq+lxW\\nfRSZm/njRXz38WQC/c0MGPMBHVs2Ze8RTb1aNej/zJOcvRhL35ETbbZIuSJZpGxIa/1fe77etl27\\n6djOUlteLbdm3Wz2NWrLzUbNOsCDTZuwdecu4hMSrWrWK+XUrLu7u9sp76218Ebey7fJm5QzHdo7\\nL8D2fYfo2LKpkblKJZKvpJKSmobZ14fE5Cv4mX0IDvQHoMX99di67xD/6Nwu5/EXYuMpXzbYZvlc\\ncJCSY1LFSSnlqZRarpTaopRap5QarZSaarl9lVJqo1Jqh2XCKnZx8fEEBVrXrAflrS23rlnfs5e4\\n+ARq3ZevttyeNevxCQQFWtWsF1YLv2cvcfGJ1LqvusPyAsQmJhEc4J+bOcCf2MSknMup6RlEnz1v\\nZD5whPjE3Ar47m+O4a1JMxn1Sh+75XUFMkkVr97ABa11D6VUdyDI8lUfKKu1fkgpFQh0sUeYW2rW\\nx49l1Hvv42f2NWrWsdSsHzhIzwEDrWrLHVSzbvWyRt4xlrzmfHkP0XPAIIfXwkMB1fBvDWL0tLmY\\nfX2oVD40TyPsyo8m8ltUNMOnzOZfcz/EZCr+ZqgsF2yLkUWqeDUG1gForVdaevnAqLnyU0p9CawG\\nbFL2Flq2LHHxCTnXL8Xlry1vxPKFxuGxaXPmElbBUls+8OWc+3R8sqv9atbL5quFv+u8VrXwT3az\\nb816maCcyQngUnwiIVav36xBXZZNfw+AaYuWE1YuhMO/n6RMgD8VQstSp0Y4N2/eJOFyMmUCA255\\nfnEr2d0rXjcp4M9Ua52G0WI8H2OKWmiLF2/1YHPWrFsPWGrWy+arLX99aN7a8uaW2vJ3jQO/Rs26\\n/WrLWz3Y7A55rWrhN26hRXNLLfy71rXwdq5Zb9yQtZu2G5l/P0lomSDMPqVztg8Y/T7xSZdJy8hg\\nw/Y9tGxUn92HjrL42x8BiEtMIi09gyB/P5vky8qy/Ze9ySRVvHYB7YGvlVKPARUBlFKNgbpa66VK\\nqR3AJlu8eOOG9Ymoo+je9yVMJjfeeXsY3/3wb6O23LpmHYza8uya9awsur7Qz6hZnzDeFtEKyVub\\n7n1ftuQdmi/v4/Qd/KZRWf6iVS18ViZdX+hvyfuO3fICNI5QRNSsTvc3x+DmZmLcoH58t3YDfr4+\\ndGrVjG6PdqDfyIlGNXz3pwgK8Kf7/3Vm9PS59Bw6joxr1xg7uJ9dF9aSTmrWi5FSygtjSqoKXMco\\nCw0CIoEVgC/GtDVba/1tYc8VXrVKVvThfbYNXEzC6zUCIPrwXgcnuXvhdRoA8MfmnxycpGhM4Q0L\\nPZAV07C2zX+gKx84JjXrJZXW+hrwwm022+Q3ekK4OlmkhHAhrrhnJIuUEC7EBdco+e2eEMK5ySQl\\nhAtxxQ8YyyQlhHBqMkkJ4UJccJCSSUoI4dxkkhLChchbEIS4naxMsjJSHZ3i7mV/LMVHPuTr7GSR\\nEsKFuOAgJcekhBDOTSYpIVyIKx6TkklKCOHUZJISwoVkZTo6QfGTSUoI4dRkkhLChcgxKSGEsDOZ\\npFxMSatZ/2DWXPYfOYbJZGL0G69Sv47K2bZu01bmfr4cL09PunRsR6+nnwTgh7XrWLjsa9zd3Xi9\\nf2/atWxut7xG5nnsP3oMkwlGv15A5i9WGJk7tLXK/AsLl3+Fu7s7r/d7wWaZXXCQurcmKaVUH6XU\\n1Hy3rVRKlb7dY2yUI84Wz2tdsx45dhSRU2fkbMuuLV8wcxrLFnzC+k2buXDxEut+3ZRTsx45biRT\\nPppji2gF5913kOgz51g1fyaRI4Yy8aNP8uadMYdPP5zI0o+nsX7Ldi5ciiXxcjJzFi1l2dzpzJsy\\ngXWbttotb27ms6ya9xGRbw9l4sy5eTN/9DGfTpnA0jlTWb9lR27mxUtZ9sl05k1+j3Wbt9k1c0l3\\nz09SWuvujs5QXEpazfr2Pfvo2KalkTe8CslXrpCSmorZ15fEy5fxM5sJDjIamVs80Iitu/dRytuL\\nlk0aYfbxwezjw4S3h9g8591nTs6X+f6CM//zTZvlc8XzSd2Li1Q1pdR/gMrADGAcUA9oCUwE0oGL\\nQE8gBPgM8MJoeekPeANLgRZANWCV5fJ5rXVZAKXUN8Ac4ATwpeV1PYHeWusoW31jcfHxRNTO3fXI\\nrlk3m33z1KyHVazAjj17ada4EarmfXy+fCW9n3uWUzFncmrLy5YJtlXMHLHxCUSomrl5AwOJjU/E\\n7OtLcGAgqWnpRMecJaxCOXbs3U+zRg0BSL96lVffHkfylRQG932eFk0a2TxrTuaExHyZA6wyB+TN\\nvO8AzRoZrTTpGVd5dcQ7JF+5wuAX7Zu5pLsXF6laGE3D/sABjMUHYDAwTGu9SSn1D6AMMAGYprX+\\nWSnVBRirtR6glPoJ6As8Aryhtb6ulLrlhYAKwHta6/VKqb7AQGCYLb85ayWvZj1f3tHDGf3BNMxm\\nXypVKJ+zPelyMnPeH8+5ixfp/dpwfvl2qU0qy+8uc+5lk8nEpFFvMXrSNMy+2ZmNbUnJycyJfMfI\\n/Po/+eWbL21Ts+56g9Q9uUht1lpfB+KVUslAFcvtXwPzlFLLgBVa6wtKqZaAUkqNAdyBWMt9PwC2\\nAAe01lsKea0LwCyl1LsY/Xt7bPD95Ch5NetliM2TN56QsrkTXLNGDVj2yXQj77zPCKtQjoyr12hU\\nvy4eHu5UCauIr48PCUlJlAmyY+aExMIzf5ydeRFh5cuRcfUqjepZZy5NQtJlylh2C0Xh7qkD5xb5\\n/63JAtBafwk8DMQBPyilagPXgG5a63Za6zZa639YHuMDmIByt3kNT8t/3wPWaK0fAt4txu+hQCWu\\nZr3ZA6zdYJQ5H9G/E1q2DGYfn5ztA4aNIj7RqFnfsGU7LZs0pnWzB9ixZz+ZmZkkXk4mLT2doAD7\\nnW6lVdPGhWd+azTxlir1DVu307JJIyPzXuvMGQQF+NskX1ZWls2/7O1enKRaKKXcgWCMRuEEAKXU\\nWGCO1vpTpVQoUBfYATwFzFVKtQfKa62XY0xS7wCPKqWe1VqvArKUUtl/W7MPOJQFopRSJuBJjGnM\\nZkpczXr9CCJULbq/8iZuJhPjhg7mu/+sNSrL27am2xNd6DdkpFFZ/nx3ggKNxahzuzY8+/IbAIwZ\\nMsiuleVG5pp0f/VN3ExujBs6yMhs9qXTQ63o9vij9BtqydwrX+ZXLJnfHCg160VwT9WsK6X6YBxH\\n8gbuA6ZgHCyvBzwNvA4kWr56A4HAYqA0xsTVBwgFxmutH1VKlQF+xTjo/hbwd+AoxoH2mYAZmApE\\nA7OBT4EXgeXZB9lvp8TVrGdl8seewvZ8nUu1B1oB8MfuzQ5OUjSm0PBCD2QdCq9m8x/o+tF/2PUA\\n4D21SJUkskjZlixSf569F6l7cXdPCJflikOH7BgLIZyaTFJCuBAXHKRkkhJCODeZpIRwITJJCSGE\\nnckkJYQLycp0vVFKJikhhFOTSUoIF+KCg5QsUqKYmNww+djvg75/2Y0bAGSdOe7gIEVjCg13dAS7\\nk0VKCBci7zgXQgg7k0lKCBfienOUTFJCCCcnk5QQLkQmKSGEsDOZpIRwIfLbPSGEsDOZpFzM+9Nm\\ncuDwYUwmE6OGvUmDiLo5237esJG5i5bg5enF/3XuSK9nu5KZmck7H0zh96iTeHp4Mn7UcGqEhzsk\\n+/ETUQwcOpw+PZ6jV/duebZt3bGT6XPm4u7mxkOtWzJoQD+HZAT44ItvOXAiGhMmRvV+mvo1quZs\\nW7f7IPNWr8HL04MuLRrT85G2AByPOcfgqQvo3aVdzm224HpzlExSd00p1UcpNfU225YopR4r4Pau\\ntk+Wa+eefZyKiWHV4gVEjh1F5NQZOdsyMzOZ8OF0FsycxrIFn7B+02YuXLzEul83cSUllZWLPiVy\\n3EimfDTHnpFzpKWnM2HKVFo0bVLg9olTpjH7w0msWLyALdt2cOLkSTsnNOw8+junLsSy8r1hTHy5\\nB5Gff5OzLTMzk4mLv2b+26/w5bg3WL/3MBfiE0nLuErkkm94sF4th2Qu6WSRsq0R9nyxbbt207Hd\\nQwDUqBbO5eQrpKSkApCYlIS/2UxwUBBubm482LQJW3fuIvp0DA0i6gBQpVIlzp2/wM2bN2/3Ejbj\\n5enJglkzCA0JuWVbzJmzBAT4U6F8Odzc3GjbuiXbdu62e0aA7UeO06GJUZ1eI6w8yalppKSlA5B4\\nJRU/39IE+/sZf8YRim2HNV6eHsx/+xVCg2z/saFMO3zZm+zuFZFSahZGhdURQAHdLZseV0q9CYRg\\n1FZ1ABoqpb4DngFWAJWAncCzWuvyxZ0tLj6eiNq5de/BQUHExsdjNvsSHBREaloa0adjCKtYgR17\\n9tKscSNUzfv4fPlKej/3LKdizhBz9hyJSZcpWya4kFcqfh4eHnh4FPzXMTY+nmCrhuLg4GBiYs7Y\\nK1oecUnJRFSrnJvFz0zs5SuYfUoT7G8mNf0q0ecvERZShp1Hj9O0bk083N3xcLdp5aJLk0WqaKpZ\\nvpoAEYB151SW1rqjZbdvtNb6aaXU21rrfyil/g8waa1bKKVaYPT72Zz1b3pMJhOTxo9l1Hvv42f2\\npVLFCkAWbVu1YO+Bg/QcMBBV8z6qVwt3/t8QOVE+6yQmk4kPXu3FmPnLMPuUJiykjN2zOtEfTbGR\\nRapoGgH/1VpnAoeUUtFW29Zb/rsTmJTvcXWA7QBa621KqQxbhAstW5a4+ISc65fi4ggpWybnerMH\\nGrF84VwAps2ZS1iFCgAMGfhyzn06PtmVMsG5U4szCA0pS1xcfM71i5diC9wttEuWoADikpJzrl9K\\nvExoYG5lerO6NVk6fggA01d8T8WQMrc8hygaOSZVNCby7pZn3cXl7MdZ32aTgz6tHmzOmnXGWnnk\\nmCa0bFnMvr452/u/PpT4hATS0tNZv3EzLZo35djx3xn5biQAG7dup25t5XQV4JUqViQlNZUz585x\\n48YN1m/aTKsWzR2SpVWD2qzZsR+AI3/EEBoUgG/pUjnbX5r0CfGXr5CWcZX1ew/Tsp663VPZRJYd\\n/mdvMkkVTRTwgFLKBNQGqlptawN8BTwI/Ga5Lfun/RjwPIBS6kHAFxto3LA+EXUU3fu+hMnkxjtv\\nD+O7H/6Nn9lMp4fb8sxTT9B38BBMwEsvvkBwYCCB/v5kZWXR9YV+eHt7MXXCeFtEu6PDR39j8oxZ\\nnD13Hg8Pd9as+4X2bdtQqWJFOrVvx/iRbzNs5FgAunTuSLWqVRySs1Gt6kRUr8xz46bj5mZi7Ivd\\nWP3rdsw+penUtCFd27ek/wcfY8LES092IsjfzJGTp5m8dDVnYxOM723HfmYN7U+gufj/Grjg3p7U\\nrN8tpVQfoB7GgfE6GMejmgOPAxOAK0A4UBl4Xmt9SCm1DvADWgFfA6HALqCn1rpsYa9X4mrWgeij\\nBx2c5O6F16wNwMkfv3BwkqJxa9y50IrzTeUq2fwHus3FM1Kz7oy01kuUUt4Yv5nrrZTyxZiQzmut\\n+9zmMR2srj6VfUEp1dOmYcU9yxVHDuc6+ODktNZXgaZKqd0YB8rHaq1vODiWEC5NJqki0lq/VgzP\\nUeiunhB/lisWMcgkJYRwajJJCeFCHPEWAVuTSUoI4dRkkhLChTjDHKWUmoHxfsEs4A2t9S6rbZUx\\nPsfqBezVWr9yp+eTSUoIUWyUUm2BmlrrFkA/YFa+u0wDpmmtmwE3lVJ3fFeuLFJCuJCsLNt/3UEH\\n4P8BaK1/A4KUUv4ASik3jE9mfG/ZPkhrffpOTyi7e6L4mOz6RuS/5qpxDqisrWsdHKSIGnd2dII7\\nKQ/ssboea7ktGePTGleAGUqpxsAmrfXIOz2hTFJCuJAsO3wVkSnf5TBgJtAWaGQ5jVEI4pj0AAAg\\nAElEQVShZJESQhSncxiTU7aKwHnL5TjglNY6Smt9E1iHcV62QskiJYQLySTL5l93sBboCmDZpTun\\ntb4CYPkI2UmlVE3LfR8A9J2eUI5JCSGKjdZ6q1Jqj1JqK8a51wZZziByWWu9GngTWGI5iH4I+OFO\\nzymLlBAuxBneJ6W1zl9AcsBq2wmgdVGeT3b3hBBOTSYpIVyIK57DUiYpIYRTk0nKxZT4mvUhb9Gn\\n53P06v5Mnm1bt+9k+pxPLDXrrRj0kuNq1idtOsyBC0mYgJEP1aN+ucCcbcsP/sEP+izuJogIDWTk\\nQ/UA2HU2jiE/7WFih/tpV62czbK54CAlk1RhlFIblFL18t1WTym14S4fH245i6ddlPia9clTadGs\\naYHbJ06Zxuypk1mxZCFbtm/nRJRjatZ3nY3jVFIqK7q1ZkKHhry/8XDOtpRr11m0N4ovn27J0q6t\\niUq4woELiZy+nMqSfSdpVMG+hauuQhYpF1Lia9ZnzyA05NaTlt5Ss96qFdt27irgWWxve0wcHaob\\n71WsEexHcsY1Uq5dB8DTzQ1PdzfSrt/kRmYmGTduEuDtSYiPN7O6NMXPy/Y7LlJp5cKUUu7Ap0B1\\nwBMYZ7WtEkbby1Wsfp2qlPoHMAy4AezWWg+zvCfkUYx32o6w3O9RoIfWOrvWagHwg9b6++L8Hly2\\nZj0unuCg3F2q4OAgYs6ctVe0POLSrlI3NDdLUGlv4lKvYvbyxNvDnYHNatH583WU8nDn0ZoVCQ8y\\nOySnK5FFKlcPjOaXfkqpssAvQHYd8OvASq31TKXU20BDpZQZGAO00FpfVUp9pZRqZbl/FaAlub18\\na4GZSqlSwDWMiqtBtv6GpGbdHnKzpFy7zqe7f+en5x/G18uTvqu3ciz2MrVDAuyWxhXPcS6LVK6W\\nQBulVPYbzUpjnJgLoC7GJAWwAWNSisBYjNYopQACyF2Udmmtsyy3o7W+qZT6EeiC8TmmTVrra8X9\\nDbhszXpovpr12NgCdwvtIcS3FHFpGTnXL6VeJcTXaDCOSkihsr8vQaW9AWhcsQxH7LxIuSI5JpXr\\nGhCptW5n+appuQ3y1qu7Wd1/j9X9G2mtl1tty+8LoBvwBLC8gO1/2T1Ts77RgTXrVUJYe8L4vOzR\\nS0mE+nrjaznWFObvQ1TiFTJuGMf0jlxKomqATcqqb8sJz4Lwl8kklWsH8CSwQikVivEZo2waaIJx\\nnpyHrW6ro5QK1VpfUkq9i3FMq0Ba6/1KqTCMFuNRtvgGSnzN+vSZlpp1D9b8bKlZD6tIp/YPM37U\\n2wwbMQaALo90olrVqnd4RttoVCGYiJBAeny9GTcTjGlbn9W/xeDn5UHHGhXo27gGfb7bioebifsr\\nBNMkrAy//nGRRfuiOJmYwpHYyyw9cJKFT7VwSP6SSGrWLZRSHsA8jF07d2A88DYwGONEXV8BScBB\\noKnWup3lwPkojAPq+4DXgN5APa31W0qpcOAbrXUTy2uMAfy01m/fKU+JrFn/7ZCDk9y98MrGWWuj\\n3h3g4CRF4z54aqFnFvy+TAWb/0A/EX9eatYdwXIaif75bv7J6vIt+xda6++A7/LdvMRqezTGBIZS\\nygS0A+544nkhRC7nOvjgoiwT1W7gf5ZPgQthE/I+KfGnWCaqBxydQ4iSSBYpIVyIKx5ilt09IYRT\\nk0lKCBeSeee7lDgySQkhnJpMUkK4EBc8JCWTlBDCuckk5cyc7DN0hcoCrl91dIq75+MHgFu3gQ4O\\nUrxc8RMkJeinQAhxL5JJSggX4npzlExSQggnJ5OUEC5EJikhhLAzmaSEcCEySQkhhJ3JJCWEC8l0\\nwfdJySLlYt6f+hEHDllq1ocPubVmfeFivLw8+b/OnejVvZtRsx452ahZ9/Rg/Ki3qVEt3H55p8/i\\nwOGjllr412lQt05u3l83MXfRF0beTh3o9czTRt5JU/k96g8j74i3qBFu3/OdfzB7PvuPHsOEidGv\\nv0z9Orldh+s2bWPulyvw8vSkS/u29Hr6CQB+WPsLC1d8g7u7O6/3e552LZrZNXNJJosUoJTyBDYD\\nx7TWvYvpOcOxOr+5Pezcs5dTp2NY9flCok7+wah3I1n1+ULAUrM+eSqrl39OYEAAAwYPoePDbTl0\\n5ChXUlJYuWQBp2POEPnhDObPmmafvHv3cSrmDKsWzSPqj2hGTZjEqkXzcvN++BGrv1xo5H3jLTq2\\nbcOho8eMWvjP5nL6zFkip81k/owpdskLsHP/QaLPnGPV3BlERZ9m1OQZrJo7IzfzR5/w3WezCfT3\\nZ8DwsXRs0wJvb2/mLFnOtwtnkZaWwezFX9pskXK9OUoWqWwVAO/iWqAcZdvO3XR82FKzXr0al68Y\\nNetms69Rs+7nR3CQ0an3YLMmbN2xk/iERBrUM6atKpUrce6CUbPu7u5u+7y79tCxbRsjb7XwfHkv\\n4+9nzs3b9AG27tpt5M2phQ/j3IWLdssLsH3Pfjq2MZpeaoRXIflKCimpqZh9fUm8nIyf2UxwoNFw\\n3OKB+9m6Zz+lvLxo2eR+zD4+mH18mDD8DbtkdRWySBlmADWUUosBPyAI48/mNa31QaVUFLAA6Aqc\\nwKi26gb8rrXuqZRqCHwMXMc4pU836ydXSrUB3rdsjwEG2KIcNC4unog6tXOuBwcG5q1ZT00l+vRp\\nwipUZMfuPTRr0tioWV+2kt49uhs162fOkpiURNkyZQp5pWLKG5+QtxY+T97AfLXw+yy18DX4fMVX\\n9O7ejVNnztq9Fj42IZGIWjWtMgcQm5CI2deX4MAAUtPTiI45S1iFcuzYd5Bm99cHID3jKq+OGE9y\\nSgqDX+xJiwca2SSfK55PShYpwzDgG+AkRtX6QqVUXWAm0Amj4movMBk4DXyrtW6mlDqtlArE6NJ7\\nTWu9Tyn1HtAT+MHq+WcBHbTWCUqpKRiL2DJbf1PWJ803mUxMem8co8ZH4mc2UymsImRB21Yt2bv/\\nID37v2JVs27rZHeZ951RjJowKW8tfMsH2XvgED1fHoy6rwbVw6s69EO1t1TZjxrG6MkzMPv6UqlC\\nuZw/y6TkZOZMHMe5ixfp/cYIfvn6c0ym4m+Gkpp119cSCFFK9bJc97HattNSnX4Ro2MP4BJGvfpF\\nYLJSygeoiNUCpJQqB9QEvrPUrvsCcbYIHxqSt478Umz+mvXGLF80H4Bpsz8hrKKlZn1QbstWxyee\\ntlvN+i218LFxhJTNrU9v1rgRyxd8bOT9eB5hFcobeV/N7crr+Pdn7VoLH1qmDLEJiTnXL8UlEGI1\\nxTW7vwHL5kwFYNr8xYRVCCXj6jUa1auLh4c7VcIq4utTmoSky5QJCrRb7pJM3ieV1zWMiSi7Ot36\\n6OaN21w2YUxcM7XWbYH5BTznWavnbKq1tsmR3lYtmrNm3S8AHPntGKEh+WrWB7+Zt2a9maVmffxE\\nADZu2WbXmvVWDzZlzS8bjLzHtCVv7r8L/d94i/iERCPvpq20aNaEY8dPMHLCB0bebTuoq2rZtRa+\\nVdPGrP11s5FZnyC0bDBmn9zMA4aPJT4xibT0DDZs3UHLBxrRumljduw9QGZmJomXk0lLzyAowN8m\\n+TLJsvmXvckkldcO4Clgm2V3729a6+l38biyQJRSyhvoAmzP3qC1TlRKoZSqq7U+qpR6DfhVa32w\\nuMM3btiAiDq16d5nACY3E++MGM533/9o1Ky3b8czf3+SvgPfwGQyGTXrQYEEBviTlZVJ1+f74u3l\\nxdTId4s71u3zNqhPRG1F936vGnmHD+W7H/+Dn6+ZTg8/xDNPPU7f14Yaefv0yq2Fz8yka5+XjLwT\\nxtotL0Dj+nWJqHUf3V8dipubiXFDBvHdT//Dz9eHTg+1ottjf6PfsNGYTPBSz2cICgwAoHPb1jz7\\nyhAAxrzxql0X1pJOatbJfbsA8DBGA3EoxnGo17XWu5VS0RjV6SlKqd1AV611dPZloDPwBhAFLAbm\\nYCxWn2mtmyilWgPTMKaqc8ALWutCzxAXXrVKVvTRA8X9rdpEeN2GkAXRh3Y7OspdC6/XGIA/dm90\\ncJKiMZWrXuiBrEX+ITb/ge6bHGvXmnVZpJyULFK2JYvUn2fvRUp294RwIa44csiOsRDCqckkJYQL\\nccX3SckkJYRwajJJCeFCHPE+JluTSUoI4dRkkhLChcgxKSGEsDOZpETxMAHuJemvk2XkuJbh2BjF\\nzBVP1SKTlBDCqZWkf/qEEHcgx6SEEMLOZJISwoXI+6SEEMLOZJISwoXIMSkhhLAzmaSEcCGu+D4p\\nWaRcTImrWZ/2EQcOHcFkglFvFZD3syV4eXryf490pNezlrzvT+H3qCg8PT0ZP/Kfds0L8MEni9j/\\nm8ZkMjF6YD/q187t4Vu3ZQdzl32Dl6cHXR5uQ6+nuuRsy7h6lcf7v8GrvZ7hH4+0t2vmkkx292xI\\nKbVEKfWYvV7PumY9ctwoIqfkdkhk16wvmD2dZQvnsX7jZi5cvMS6DRtzatYjx41myozZ9oqbm9fy\\n2pEfzsibd8o0FsyaxrKFc1m/cUvevIsXEDl2FFM+sl9egJ0HDhN99hyrZk8mctggJn68MG/mOQv4\\n9P0xLJ0Ryfptu7gQm9teNnfZ1wT4+dk0X2aW7b/sTRYpF3K7mnUgT826m5tbTs169OmYAmvW7Za3\\nXVsjb7VwLicn581rNufLu4vomDM501aVypU4d95+eQG27ztIx1bNjcxVK5OckkpKapqR+XIyfpYm\\nYzc3N1o0bsDWvUYp0MnTZ4g6dYa2zR+wW1ZXIYvUXVJK9VFKLVZK/aCUOqmUek4p9b1S6oRSqrlS\\narpSarNSardSqn++x7orpT5TSq233Mcms35cXDxBQblFmdm15UCemvXr12+wY/ce4hISqFWzBpu3\\n7eDmzZucjD6VU7NuD3HxCQRZFWQGBwXlzWupWc+T9758eS016/YSm5BEsFVnXnCAP7GJRlmoUbOe\\nTvSZc1y/cYMd+w8Rn2j8WU6et4QRr7xo83zSuydqAm2A/sBIoBHQB3gROKq1HqqUKo1RbbXQ6nE9\\nMOrb+ymlygK/AA1sHbbE1aznryx/dyyj3o201KxXhKws2rZqwd4DB+k54FXUfdl5nahm/Z+vM3rq\\nHMy+PlQqX46srCz+39r13F9XUalCOYflLMlkkSqa3Zaq9fPAQa31TUvtujcQrJTaitGtF5LvcS2B\\nNpb+PYDSSikvrfW14gxX4mrWQ8oSF2+VN66AvJ/NuzXvwJet8na1c816MLGJuZPmpfhEQoKtatYb\\n1mPZR+8DMG3hl4SVD+XnzTuIOX+RDdt3cyEuHi9PD8qXLUPLBxoWez5X/O2e7O4Vze2q1sOB9kBb\\nrXU7IH/x5zUg0qpqvWZxL1BQEmvWm7Fm3XpLXk1o2Xx5XxuSm3fTlty871rybt1G3dp2rllvcj9r\\nN24zMv8eRWiZIMw+pXO2Dxj5Xm7N+vZdtGzckBlj3+KbTz5k1ZzJdH20I6/2esYmC5SrkkmqeDQB\\nvtdaX1dKPQG4K6W8rLbvAJ4EViilQoE3tdajijtEiatZb9iAiNq16f7iAEwmN94Z8Rbfff9v/My+\\nuXkHvWlUlvexypuZRdcX+uLt5c3UiePtlhegcURtImpWp/vrI3AzmRj3+kt8t+YXo2a99YN069KJ\\nfiPexYSJl557miCr41f24IrvOJcG47uklOqDUbX+luVtBV211n0sl/sBYUA68P8wdu+SMaravwH+\\nC8wD6lpuG6+1/qmw1ytxDcZA9JH9Dk5y98Lr1Afgj63/c3CSojFVrltoe3BkqWCb/0CPzkiQBmNn\\npLVeYnX5R+DH/JetzOBW/Qu4TYhiJcekhBDCzmSSEsKFZLrg4RuZpIQQTk0mKSFciByTEkIIO5NJ\\nSggX4orvk5JJSgjh1GSSEsKFuOIxKVmkRPFxc3d0grt33fjoZObxPQ4OUjTuleve+U4uRhYpIVyI\\nK37MTY5JCSGcmkxSQrgQVzwmJZOUEMKpySQlhAuR90kJIYSdySQlhAuRY1JCCGFnMkm5sOMnohg4\\n9J/06dGdXt275dm2dcdOps+Zh7ubGw+1bsmgAX0dlDLX8RNRDBzyFn16Pkev7s/k2bZ1+06mz/nE\\nkrcVg17q56CUMOmbtRz44ywmTIzs1pn64RVzti3/dTc/7DyEu5uJiCoVGdmtM5eSrjBm6Q9cu36T\\nm1mZjOjamYgqFWySTc4nJfJQSpmVUtFFfEw7pdTvSqlud773n5eWns6EKdNo0bRJgdsnTpnO7A8/\\nYMXiT9mybQcnTv5hyzh3lJaezoTJU2nRrGmB2ydOmcbsqZNZsWQhW7Zv50TUSTsnNOw6fopTlxJY\\nMfxFJvR6jPe/XpOzLSX9Kov+t40vh/Zm6bA+RJ2P5cAfZ1iybgcdGiqWDHmeoU+2Z+b3622WL9MO\\nX/Ymi5T9PQR8rLX+2pYv4uXpyYJZ0wkNKXvLtpgzZwkI8KdC+XK4ubnRtnVLtu3cZcs4d+Tl6cmC\\n2TPuLm+rVg7Lu13/QYeGCoAaFcqSnJZBSrrRYObp4Y6nhztpV69x42YmGdevE+BTmiBzaZJS0wFI\\nTssg0NfHIdlLKtndKyKllD/wLVAK2Gy5rQ3wPnAdiAEGYPyj8zlQCfAFxgOngL7AdaXUea31Klvl\\n9PDwwMOj4P97Y+PjCbauYw8OIibmrK2i3JVC88bFE2xdxx4cRMwZx+SNS06lrtWuWpDZh7jkFMyl\\nvfH29GBglzZ0HjeHUp6ePNqkLuHlytC7fXOenbKI73ccIiXjKkuH9bZZPnkLggDoBRzWWrcBsjuc\\nZgFPaq3bAxeBbkAwsFZr3RZ4BnhXa30IWALMtOUCVWQl7TiGU+XNzZKSfpVP12zhp/EDWTthMIei\\nz3LszEUW/byNRxrX5d/vvMq7Pf6PD7/92YF5Sx5ZpIquLrDVcnkDUA6oCXynlNoAPIzRwZcINFVK\\nbcGYqMrc8kwOkr+O/eKl2AJ3s5xFaGi+vLGOyxsSYCYuOSXn+qWkFEICzABEXYijcpkggsw+eHm4\\n07hGFY6cPs/eqDO0iagBQMva1Th8+rzN8skxKQFgIvf/KzeMCvWzVhXqTbXWU4AeGNNUG+Dvjola\\nsEoVK5KSmsqZc+e4ceMG6zdtoVWL5o6OdVu35N242WF5W9Wpztp9xwA4evo8oYFmfEt5AxBWJoCo\\ni3FkXLsOwJHT56kaGkyVkCAO/mHsnh46dY6qocEOyV5SyTGpotMYterfYkxNiQBKqbpa66NKqdeA\\nX4GywB9a60yl1D8Ar9s9oS0cPnqMyTNmcvbceTw8PFiz7hfat21DpYoV6dS+HeNH/pNhI8cB0KVz\\nR6pVrWLPeAXk/Y3J063y/mzJG1aRTu0fZvyotxk2YoyR95FOVKta1SE5G9WoTESVCvT4cAlubjDm\\n2UdZve0AfqW96Xh/bfp2fJA+Hy3Fw93E/dUq0+S+KlQNCWLM0h/5796jAIzq9ojN8rniWxCkZr2I\\nlFKBwGqMaWoz8ALwPDANY6o6Z7mtAvA9EAssAt7AaDp2A+K01nMKe50SWbP+2yEHJ7l74dXvAyBq\\n2XQHJyka9w7PF1pxPtDkb/Mf6E+ykgvNoJSaATyIccDuDa31Lqtt0Ri/XLppuamn1rrQ34LIJFVE\\nWuskjAkq2zuW/+bf/4gGGlhdX2bDWEIAjv9YjFKqLVBTa91CKVUH4x/oFvnu9qjWOuXWRxdMjkkJ\\nIYpTB+D/AWitfwOCLG/b+dNkkhLChcy7w66YHZQHrE8cH2u5LdnqtnlKqXCMwyUjtdaF7qLKJCWE\\nsKX8i+Y4YCjQDqgHPH2nJ5BJSghRnM5hTE7ZKgI5bwzTWn+RfVkp9R+gPvBNYU8ok5QQojitBboC\\nKKUaA+e01lcs1wOUUmuUUtlvx2kLHL7TE8okJYQoNlrrrUqpPUqprRi/bByklOoDXNZar7ZMT9uV\\nUunAPu4wRYEsUkKIYqa1HpHvpgNW22YCM4vyfLK7J4RwajJJOTVH/zbZhZmMf59NvoF3uKNwNJmk\\nhBBOTRYpIYRTk0VKCOHUZJESQjg1WaSEEE5NFikhhFOTRUoI4dRkkRJCODV5M6cLM2rWh9Onx3O3\\nqVmfa1Wz7rja8mwlpWb9g2X/4kDUaUwmGNXzSepXzz0//LKft/DD1r1GzXq1yozq+SRpV68y8tOV\\nxCenUNrbi/f7P0tI4F86D9w9RSapu6SU6qOUmprvtmhL1foIpVT+U6Te7nmmWj5waVNGzfrUQmrW\\npzH7w0msWLzAUrPumNrybCWlZn3nsShOXYxj5bjXmNjvGSKX/itnW0p6Bot+2sDS0QNZNmYwUWcv\\nsv/EKb5av4PKoWVYOnoQLz/egdnfrSnkFUR+skgVA631JK31NkfnsGbUrM8gNCTklm0F16zvdkDK\\nXCWmZv3o73RoXA+AGhXLkZyWRkp6BgCe7u54uruTlnGNGzdvknHtGgFmH05djKWBZdpqoqqz9/c/\\nHJK9pJLdvaKpZjnVRGVgRvaNSqklGKecKAu0BkIABXyotf5MKdULeBs4A6RzF+fQ+auKVrMeTEzM\\nGVtHKlSJqVlPukJEeKXcLH5mYi9fwVy6FN5engx6qjOdh7+Pt6cnXR68n2rlQ6hVqQK/HjxG56YN\\n2HksinNxiQ7JXlLJIlU0tYDGgD/G6SduFnCf+kBLjFbjlUqpRcD7GF19ieQ9/7NzKGm1Zk6U17oS\\nLiU9g/k/rOOnySPwLe3Ni5Pmcez0OZ5u2wwdc56eE+fQtHYNgv3NDkxc8sgiVTSbtdbXgXilVDJQ\\nUKPmNq31TaXUGSAAo179itb6EoCldt2hCq5Zv3W30Fk4U816aJA/cZev5Fy/lJRMaIAfAFHnLlI5\\nNJggP18AHqhVnSPRZ6hdpSLj+xin8k7NuMq6vTYfpF2KHJMqmvz/hBf0T/oNq8sm8taygxP8md9a\\ns+642vK74VQ16/UUa3YdBOBI9BlCA/3xLV0KgLCywZw8dymnZv1wdAxVy5Xl1wO/MfPb/wLww9Y9\\nPNSgtkOyl1QySRVNC6WUOxAM+AIJd/GYeCDA0nycCrQCbH6Q/fDR35g8Y5altty9gJr1txk2ciwg\\nNetF0ahmOBHhlXhuwmzcTCbGvvAPVm/ahbl0KTo1qU/fLu3oPWkuHm5u3F8znCaqOhnXrrN83Rae\\nfW8WAb4+THu1p0Oyl1RSs36XLG8beATwBu4DpgATMWp55pB74Lye1votpZQZOKy1DldK9cWoWY/G\\nOHD+X631ksJez6hZP2ibb6aYhdc1ippLVM16jVoAnFy90MFJisbtwcfvuTMhyiLlpGSRsi1ZpEoO\\nhx8fEUKIwsgiJYRwarJICSGcmixSQginJouUEMKpySIlhHBqskgJIZyaLFJCCKcmi5QQwqnJZ/fE\\nvemm8SHgrHOOPSOpuDOZpIQQTk0WKSGEU5NFSgjh1GSREkI4NVmkhBBOTRYpIYRTk0VKCOHUZJES\\nQjg1eTOnCzt+IoqBQ4fTp8dz9OreLc+2rTt2Mn3OXNzd3HiodUsGDejnoJS5jp+IYuCQt+jT8zl6\\ndX8mz7at23cyfc4nlrytGPSS4/JO+nEzB05fxGSCkY+1pn7lcjnblm87xA/7juPuZiIiLJSRj7dm\\n9Z5jzP7fDioHBwDQomYlXnm4iaPilziySFkopcoD72qtX1ZKRWNVsKC1/rEIz3M/8Het9Tu32T4e\\niNNaz/nLoQuRlp7OhClTadG04B+GiVOm8dnHsygXGkKv/q/wSIeHua96dVtGKlRaejoTJk+lRbOm\\nBW6fOGUan32SnfdlI28N++fddfIsp+Ius2Lg00RdSmDMN+tZMdDo1EvJuMaijfv571s98XB3o/9n\\n33Pg9AUA/tbgPv7ZpZXd87oC2d2z0Fpf0Fq/XAzPs/92C5Q9eXl6smDWjAJLP2POnCUgwJ8K5cvh\\n5uZG29Yt2bZztwNS5vLy9GTB7BkFln7ekrdVK7bt3OWAlLA96iwdIqoBUCM0mOSMq6RkXAPA090N\\nT3c30q5d58bNTDKu3yDA0skn/rx7apKy1FK1xaieigBGA88BdYGewMda61tGD0vX3qdAdcATGKe1\\n/kUptQH4H9De8pyPW+4zWGvdVSk1DOiK8Y/Bf7TW79r0G7Ti4eGBh0fB//fGxscTHBSUcz04OJiY\\nmDP2ilagQvPGxRMcFJhzPTg4iJgzZ+0VLY+4K2nUDctd+IN8SxN3JQ1zKS+8PT0Y2KEpnT9cSilP\\nDx5tcB/hIYHsO32B3SfP8dKiH7iemcnwLi2pW9F5G6OdzT21SFnUBNoA/YGRQCOgj+Xy7fQAzmut\\n+ymlygK/AA0s25K11h2UUpOAfwD78z22NUaD8Uml1Ixi+y6KU0mrNXOmvFZZUjKu8emGPfw0rAe+\\n3l70Xfgvjp2Po2HlcgT7lqJt7XD2n7rAyK/W8a83uzswdMlyL+7u7dZaZwHngYNa65vARSCgkMe0\\nBJ6yTE7fAKWVUl6WbZss/z1TwHOkAb8C6zEmreBi+Q7+otCQssTFxedcv3gptsDdQmcRGpovb2xs\\ngbuF9hDi70PclbSc65eS0wjx9wEg6lIilYP9CfItjZeHO43DK3DkbCzVQ4NoWzscgPurlichNZ2b\\nmZmOiF8i3YuL1I3bXD5VyGOuAZFa63aWr5pa62sFPEdOcaNSqiowFPib1rrdHZ7fripVrEhKaipn\\nzp3jxo0brN+0mVYtmjs61m3dknej4/K2qlmZtYejADh6NpZQfx98vY1/r8KC/Ii6lEjGdeOvxJGz\\nsVQtE8Bnv+7j3/t/B+D3C/EE+5bG3e1e/NH7c+7F3b0/YwfwJLBCKRUKvKm1HnWHx5QFLmmtU5RS\\njYGqgNcdHlNsDh/9jckzZnH23Hk8PNxZs+4X2rdtQ6WKFenUvh3jR77NsJFjAejSuSPVqlaxV7Tb\\n550+05LXgzU/W/KGVaRT+4cZP+ptho0YY+R9pBPVqlZ1SM5GVSsQERZCj7nf4jEpG38AACAASURB\\nVGYyMebJh1i95xh+pbzoGFGdvg81os+Cf+HhZuL+quVpUq0ilYL9GfHVz6zaeYSbmZlMePphh2Qv\\nqWSRujtfAe2VUlsBd2D8XTxmP5CilNoCbAbmA59YLttcvbp1+HLB3Ntub/pAI1Z9/pk9otyVenXr\\n8OXCebfd3vSBxqz6YpEdE93e0L+1yHO9doXcXc9nm0fwbPOIPNvLB5hZMuApu2RzRaYsZzoIKXKE\\nV62SFX30oKNj3JXwusbvEKJ/O+TgJHcvPNx4G0HUPIe/W6RI3P/xhunO93ItsmMshHBqskgJIZya\\nLFJCCKcmi5QQwqnJIiWEcGqySAkhnJosUkIIpyaLlBDCqck7zsW9KfsDvpeTHJtD3JFMUkIIpyaL\\nlBDCqckiJYRwarJICSGcmixSQginJouUEMKpySIlhHBqskgJIZyaLFIu7PiJKDo+8Q+Wrvz6lm1b\\nd+yk6/Mv8mzvfny8wDlOI3z8RBQdH/87S1d+dcu2rdt30rVXH559oS8ff+rYvJN+3sNzX6yhx5dr\\nOXQ+Ps+25XuO89wXa+i19H988PMeAOJTM3jpq/X0Wf4zPb9cy4FzcY6IXWLJIuWi7qZmffaHk1ix\\neAFbtu3gxMmTdk6Y193UrM+eOpkVSxayZft2TkQ5Ju+u0xc5lXiFFS88woRHm/P+/3Kbn1OuXmfR\\njt/4slcnlvbqRFTcZQ6cjeOHI3/wREQ1lvToyJttGzJ7Y8k4LbSzkEXKRUnNum1sj75Ih1qVAKhR\\nNoDkjOukXL0OWNes3+BGZiYZN24SUNqLPs3q8FhEOADnk9Mo5+fjkOwl1T312T2l1A6gh9Y6SilV\\nCfgXcBCjGt0boz59rVIqGqhnqaOaChy2PEVrIARQwIda68+UUs8D/wRigDiMduMvuX0t+2EArfVg\\nW36vUrNuG3Gp6dQtn9vxGuTjTVxqOmZvT7w93BnYuh6d531PKQ93Hq1TlfBgfwBiU9IZ9M2vpF67\\nweLn2jske0l1r01SXwLPWi4/gbFIZWit22JUpM+5w+PrW+73FPCaUsoN+ADoCHTDqG+H3Fr2hy33\\n/cjqOQ7beoEqspLWGOSkeVOuXufTbUf56aXHWPvqExw6H8+xi4kAhJhL81Wfv/HPDo0Y9e/tDk5a\\nstxri9QKjEUG4DGgMrABQGt9DriqlCqsCn2bpZY9u1K9LJCstb6otU4F1lnuV1gt+87i+3b+HKlZ\\n//NCzKWJS03PuX7pShohvqUBiIq/TOUAX4J8SuHl7k7jSiEcuZDArtMXuZxhFF63rRHGUcvCJe7O\\nPbVIaa3jgTNKqaYY33saVtXoGA3DmYD1P9WeVpfzV6qbLPfPlv24wmrZr+FgUrP+57WqVoG1OgaA\\noxcSCPXzwdfb+CsSFuBLVHxybs36hQSqBvvxPx3Dvw4ZB/qPX0qivByTKpJ76piUxZfAxxjHjDKA\\nh4GVSqnKQKbWOkkplQxUUEqdBB4E9t3mueKBMkqpIMtztQO28Odq2YuV1KzbRqNKIUSUD6bHl2uN\\nmvVOTVh98CR+3p50VJXp27wOfVasw8PNjfvDytKkcig1yvgz8t/b+Z+O4drNTN55pODfYIqC3XMN\\nxpbdrgsYB7VTgHlADYwpaqTWeqNSagAwDNAYC9FGy8Praa3fUkqZMY4thSulBgIDgd8xFqr/YOxW\\nzgPqYqll11r/ZNn9G6y1zj4Qf1vSYGxb4VWMRS7qwyEOTlI07i++c881GN+Lk1Qr4AetdfYpGfvn\\nv4PWegGw4HZPoLVOAcItVy8BD2mtE5RSa4AorfWN2zxvu78WXYh7zz21SCml3gUeAZ4uxqf1AX5R\\nSqUC+7XWW4vxuYW4591Ti5TW+h3gnWJ+zi+AL4rzOYUQue6p3+4JIUoeWaSEEE5NFikhhFOTRUoI\\n4dRkkRJCODVZpIQQTu2eeguCEDm8SgFgatjCwUHEncgkJYRwarJICSGcmixSQginJouUEMKpySIl\\nhHBqskgJIZyaLFJCCKcmi5QQwqnJIuXCpGbdNj744lu6j5vGc+OmcyjqVJ5t63YfpNvoD+k5fgbL\\n1vyac/vxmHN0fuPdPLeJuyOLlA0ppaIt50O3vu0xpdQSW7+21Kzbxs6jv3PqQiwr3xvGxJd7EPn5\\nNznbMjMzmbj4a+a//QpfjnuD9XsPcyE+kbSMq0Qu+YYH69VySOaSThYpFyU167ax/chxOjQxiidq\\nhJUnOTWNlDSjhy/xSip+vqUJ9vfDzc2NByMU2w5rvDw9mP/2K4QGBTgkc0knn90rJkopT3Kr1b2B\\ncVbb6mOcYjgBiLJHHqlZt424pGQiqlXOzeJnJvbyFcw+pQn2N5OafpXo85cICynDzqPHaVq3Jh7u\\n7ni4uzskryuQRar4PIelsl0pVRFLM7LFWIxaq38ppeY6JF1hSlqtmRPltU5iMpn44NVejJm/DLNP\\nacJCyjhV1pJKdveKTxPyVbYD2ZXtdYHsFpkN9g6Wn9Ss/4UsQQHEJSXnXL+UeJnQQP+c683q1mTp\\n+CHM++cr+PmUpmJIGUfEdCmySBWfLAqubIe8dewO/zOXmvU/r1WD2qzZsR+AI3/EEBoUgG/pUjnb\\nX5r0CfGXr5CWcZX1ew/Tsp5ySE5XIrt7xWcX+SrbgewCUo0xaa2x3MfmpGbdNhrVqk5E9co8N246\\nbm4mxr7YjdW/bsfsU5pOTRvStX1L+n/wMSZMvPRkJ4L8zRw5eZrJS1dzNjbB+P9ix35mDe1PoNnX\\nId9DSXPP1azbilLKg3yV7RgHy+sBtYDFwFngJGDWWvcp7PmkZt22wu8zJpyTP5asykS3xp2lZl38\\nObepVg+3/Hcv0NCugYRwEQ4/PiKEEIWRRUoI4dRkkRJCODVZpIQQTk0WKSGEU5NFSgjh1GSREkI4\\nNVmkhBBOTd7M6cxMJejNxVlZkJHq6BR3z8MTAFPl2g4OIu5EJikhhFOTRUoI4dRkkRJCODVZpIQQ\\nTk0WKSGEU5NFSgjh1GSREkI4NVmkhBBOTd7M6cKOn4hi4JC36NPzOXp1fybPtq3bdzJ9zie4u7nx\\nUOtWDHqpn0Myvv/Rxxw4chQTJkYNGUyD/9/efYdHUa9tHP9uGpBCGgm9Cz+lKQiRKiLFcixHBQXh\\nIGI5gngUy6sUEQVFFBsCApaDggIWbMdDUQQbBJQO6sMhFOkkISSk0ZL3j5kkCyQYkGRnkudzXVzs\\n7uzM3hv04TfDJneTgg9XfvP9j7wxYxZBgYH8rduV9Ot1Ex998RVfLPg6/zkbfxfWfDu/VDOPm/gG\\nazf9hsfjYcSDg2l+UUHZwuIflvHGu+8TFBjItd2uoN8tf+fj/8zn8wXf5D9nk2xm9ddflmpmN9Mh\\ndZ7Y1ekfA8eB+iLi03694tSWvz1lIlVjY+h39z+5qmsXLmjYoFQzrly9lh07dzH3zckkbN/B8Gdf\\nYO6bkwGrsnzMSxP5dMZ0IsIrc8/Dj9Otc0d63fA3et3wt/z95y9eWrqZ16xj+67dzJ020co87iXm\\nTptYkPmVScx7e4qV+dHhdOvUgZ7XXUPP667J33/Bt9+Vama309O980xEFvh6QIE7asuX/7Kabp07\\nAtCwXl1S0w6TnmF9a03KoVQqh4YSFRlhVZa3bsWyn1edtP/kd95j8MD+pZo5ftUaunVqn5857XB6\\nQebUVMJCQ/Izt7u0Jct+WX3S/lNmzGLQgH6lmtntyv1KyhgzAOgMVAGaAiOw2oibAH2BySLS2n7u\\nL0BPrPaXsUAWsN9+nvfxmgGTgJlYtertgTeAFsBl9jEnl+T7ckNtedLBgzS9sHFBjsgIEpMPEhpi\\n/Y+ekZnJ9p27qFm9GitWrSWu1SX5z13/6+9UrxpLTHRUYYcuMYnJKTQ1XpkjwklMTrEyR0SQkZlV\\nkHn1OuJatsh/7obfhGqxMaWe2e3K/ZCyNQI6YbW9DANaAgPs24UZAjwiIj8YY24GiqqpvQT4O1aT\\n8SagPlAR+AQo0SF1VhxSa+Zdr+bxeHj+yScY/uwLhIWEUKtGtZNyfvzFV9z0t6t9EfMkp2Ue8Rgj\\nxr1EaGgItapXO+lL+9GX87npmqt8kNLd9HTP8ouI5AJ7gfUicgJrhRRexPM/AqYaY4YDa0RkXxHP\\nSxCRZPu4B0Rk958ct1Q4pbY8tkoVkpIP5t8/kJRMTHTBvI9rdQkfTJ3ItJfGERYSSs3q1fK3rViz\\njpbNm5ZqXoDYKtEknpq5SsHKKK7lxbw/5RWmvTCW0NAQalavmr9t5Zp1tGzepFTzlgU6pCzHi7h9\\nqkAAEZmJ1UScBHxpjCnq530UdVyf/gwWp9SWd4hrzcIl3wPWv3jFVokmNCQ4f/vdQx8n+WAKmVlZ\\nLPlpGe3aXArA/sQkQipVJCgw0AeZL2XR0h/szP+zMgcXZL7nkeEkp1iZl/4UT/vWrazMSUkEV6rk\\nk8xup6d7Z5YGXGCM8QBVsdqJMcY8CUwSkenGmFis61eO4oba8lYtmtH0wsb0vmcIHj8PTz36IPO+\\nWkBYSAjdr+jErTf+jYEPPWZVlve/nagIawGamJxMVGRkqecFaNW8KU1NI3rf9yB+Hg+jHn6Aef9d\\naGXu3JFeN1zDXUOfwOPxcO8/ehOZlznpINFe1wFV8ZX7mvW8C90i8qgx5jqgp4gMyLsN5ALNgXVY\\nF8z/gXWh/V9Aiv3rDmAK1kcQqlBw4fxjEWltjAkFNopIPe/bZ8pVr26dXLfUlte7qDnk5rJ97Qpf\\nRym2epdYK8dtq37ycZKz44mp46KfhHh+lPsh5VQ6pEqWDin30GtSSilH0yGllHI0HVJKKUfTIaWU\\ncjQdUkopR9MhpZRyNB1SSilH0yGllHI0/bYYdX54PFAxxNcpii/jMAA586b6OMjZ8f/nc76OUOp0\\nJaWUcjQdUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhVYZt3pJAt+tvYtac\\nD0/btix+JT37DeC2/gOZPP1tH6Q7nVvyPr90HX1mL+H22UvYsO/gSds+WJtAn9lL6DdnKeOWrAPg\\neE4Owxb8TL85S+n9wRJW7U7yRWzX0iFVRhWnZv31CeOZPeMtfoqPZ0vC1lJOeDK35P15ZyI7UtKZ\\n3acLY3pcynP2IAJIP3KMd37ZzMzbOjOr9xUkHExj3Z5kvvj1DyoFBjCr9xWM6XEpLyxd75PsbqVD\\nqoxyQ826N7fkjf/jAF0vqAFAw+jKpGUfJf3IMQAC/f0I9PMj8+hxjufkkH3sBOGVgrj+ojo83tlq\\nMo4KDuJQ9lGfZHcr/d69UxhjVgC3i0iCMaYW8DmwHmgAVABGicgiY8x2rJaZdGPMBGCjfYiOQAxg\\ngBdF5G1jzD+A/wN2YnX1fSsiM0ryfbihZt2bW/ImZWbTpGpBnVZkcAWSMrMJrRBIhQB/Bre7iB5v\\nL6BigD/XXFibepFhJ+3/3uot/O3C2qUd29V0JXW6mcBt9u0bsIZUtoh0Bm7Gqqo6k+b28/4OPGCM\\n8QPGAd2AXlh17s7itsYgJ+X1ipJ+5BjTV/7O/IFXsejua9iw9yC/Jx7K3/7B2gR+23+IQW0v8kFQ\\n99IhdbrZWEMG4DqgNrAUQET2AEeMMVGF7wrAcrumfRdWnXoVIE1E9otIBrC4pIIXl1Nq1ovLSXlj\\nQiqRlJmdf/9ARhYxIRUBSDh4mNrhIURWqkCQvx+talZh035rSH2yYRtLE/by+o3tCPTX/+3Ohn61\\nTiEiycAuY0wbrK9PJifXogcBOZz0dyje3dmn1ql77Ofn8fkywCk168XlpLwd6lVl0WbrVPPX/SnE\\nhlQiJMj6469ZOZiE5MNkHzsBwKb9KdSNCGXnoXTmrt/Gaze0pUKAv09yu5lekyrcTGAyMB3IBroA\\nc4wxtYEcETlkjEkDqhtjtgJtgTVFHCsZiDbGRNrHugIo8UZKN9SsuzFvyxrRNK0awe2zl+Dn8TCy\\n6yV8umk7YUGBdGtUk4FtGjPgo+8J8PNwSY1oWteqwis/buRQ9hHu+7Tgj/3NWzoRpCuqYtEG40IY\\nY4KAfVgXy9OBqUBDrFXUMBH53hhzD/AIIFiD6Ht797zKdu9q9cHAYOB/WIPqvyIy80wZXNdgDLgl\\nL0C92taQS3husI+TnB3/fz5X7hqMdSVVuA7AlyKSd9Xz7lOfICJvAm8WdQARSQfq2XcPAJeLyEFj\\nzEIg4fzGVars0iF1CmPM08BVwC3n8bDBwLfGmAxgrYgsO4/HVqpM0yF1ChF5CnjqPB/zPeC983lM\\npcoLvXKnlHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH048gqPIpx/r+OlIOnvl5yud0JaWU\\ncjQdUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SZdjmLQl0u/4m\\nZs358LRty+JX0rPfAG7rP5DJ09/2QbrTuS0vgKdrL/z+8X/49XsMqp1cDuFp1dna1vdRPF17+Sih\\n++mQOkfGmO122YL3Y9cZY2b4KNJJMrOyGDN+Au3i2hS6fewLL/H6hPHMnvEWP8XHsyVhayknPJnb\\n8gJQuxGeyBhyZr5AzvyZ+HW/rWBbUEU8l/UgZ9YEct6fgKdKdahR33dZXUyHVBkVFBjIm6+/UmiJ\\n5s5duwkPr0z1alXx8/Ojc4cOLF/5sw9SFnBbXgBPvQvJ3bzOupO8DyoGQ5BVFMqJ49avoArg8YOA\\nIMjK8FlWN9NvMC4GY0wgVgdfA6ACMMprW3Osn19+EK8WGGPM/cDtWMWgn4nIS8aY0fYx6gNX2E3H\\nJSIgIICAgML/eBOTkomKjMi/HxUVyc5du0sqSrG4LS8AIZXJ3fdHwf3MwxBSGY5mw4nj5P74FX73\\njYXjx8j97WdIOeC7rC6mK6ni6QNki0hnrAr2SV7bngRGi0hX4ASAMaY+0BPoCFwO3GKMqWM/P0hE\\nOpXkgDprbutedGxer0q8oIp42l1NzvRR5LwxAk/1+hBb03fRXEyHVPG0BpYCiMge4AgQZW9rAuRV\\nVC21f48DGgFL7F9hFHTwrSzpsH8mNrYKSUnJ+ff3JyYWeprlFI7Nm56KJ6Rywf2wcMhItW5HV4PU\\nJOsUL+cEubu24Knm25Zot9IhVTy5nPTXJEFYp3HYj+fdzvt6HgW+EpEr7F/NReR7r20+VatGDdIz\\nMti1Zw/Hjx9nyfc/0qHdZb6OVSSn5s3d9ise08q6U7U2HE6Fo0es+6nJ1qAKCATAU60uuQf1dO9c\\n6DWp4vkZ6ALMMcbUxhpKee3GgrXSWmg/B2AVMN4YEwxkAa8CT5Rm4I2//sb4l19j9569BAQEsPCb\\nb7mycydq1axB9yu7MHr44zzyxEgArr2qO/Xr+vZvebflBWD3VnL37bA+fpCbS87Xs/E0b0fukSzY\\nvJbcFV/j12co5OSQu3sr7Nri68Su5Ml17Pm9cxhjAoCpQEOsVdQwrIvlzYDGwL+B3cBWIFREBhhj\\nBgMDsa5TfSYi4+wL50kiMun0VzlZvbp1crf/tqEk3s55V++i5gC4JS9AvZq1AEh4vK+Pk5wd/yem\\nev78WWWLrqSKQUSOA3ef8nA9+/fVwMWF7DMFmHLKY6NLIJ5SZZpek1JKOZoOKaWUo+mQUko5mg4p\\npZSj6ZBSSjmaDimllKPpkFJKOZoOKaWUo+mHOVX5FBIGgN/NA30cRP0ZXUkppRxNh5RSytF0SCml\\nHE2HlFLK0XRIKaUcTYeUUsrRdEgppRxNh5RSytH0w5xl2OYtCQwe+igD+vahX+9bT9q2LH4lL0+a\\ngr+fH5d37MD9997lo5QF3JJ33JuzWCsJeDww4p5/0Lxxg/xti+NX8cbczwkKDOTay9vS77ruZGUf\\nYdir00k6lMrRY8cYdNvf6RLX0mf53UaH1HlgjLkaq/BzPvCxiLQ+ZXuSiJRqB1NxasvfnjKRqrEx\\n9Lv7n1zVtQsXNGxQ6HNLg1vyrtzwG9v37GfuhKdI2Lmb4a+9xdwJTwGQk5PDmGnvMe/VMUSEhXLP\\n6Al0a3spq3/bTLNG9bn7luvYfSCJgU+O1yF1FvR07zwQkQUi8oavc3hzW225W/LGr/uVbm0vBaBh\\n7ZqkpWeQnpkFQEraYcJCgokKr4yfnx/tLm7CsrUbubZTW+6+5ToA9iUmU61KpE+yu5WupM6B3UY8\\nC6sJJgD4BqsAdJLXc64BHgCut+8/A/QAkoHrRSSHEuS22nK35E08dIimF9QryBIeRmLKIUKDKxEV\\nXpmMrGy279lHzdgqrFj/G3HNL8p/bu/HnmZ/UgpTRz3sg+TupSupc9MT+FpEugAPYjUa5zPGXIBV\\nv97HrlOPwjoNbGvfblHKec/MbbVmDsrrHcXj8fD8Q/cy4rU3GfLsa9SqGoN3ZdycF59iypNDeezl\\nqWiVXPHpSurcLAI+NcZEAB8D+4C885QQ4DOgv4jYndukich6+/ZuILw0w57KsbXlRXBS3tioSBJT\\nUvPvHziYQozXKi+u+UW8P/5JAF56dy41q1Zh45ZtRIdXpnpMNBc1qMuJEyc4mJpGdIRP/zNwDV1J\\nnQMR2YjVtfcDMA6o47W5lv34YK/Hjp9yCJ8WPDq1trwoTsrboWUzFi1bCcCmLduJjYokNLhS/vZ7\\nnnqR5EOpZGZns3TlGtpf3IxfNv7Ovz+bD0BSSiqZWUeIrBzmk/xupCupc2CM6Q1sFZHPjDFJwH+B\\n6fZmwRpQ3xpjeojIIl9kdFttuVvytrqoMU0b1qf3Y0/j5/Fj1KD+zPvme8JCgunerjW9rrqCu0a9\\ngMfj4d5e1xMZHkbva7oyYuJb9H18DNlHj/HkoP74+en6oLi0Zv0cGGNaYdWup2NdPJ+HVcE+Cfsj\\nCMaYhsCXwGXAtryPIBhjPgYmicjSM72G1qyXrHqNrQva2779zMdJzo6ncZzWrKs/JyKrgbgiNre2\\nn5MANLEfy7+AIiI9SzadUmWLrjmVUo6mQ0op5Wg6pJRSjqZDSinlaDqklFKOpkNKKeVoOqSUUo6m\\nQ0op5Wj6YU5VPh2xfgZUTrxPvmvpnPk3LuozxGWXrqSUUo6mQ0op5Wg6pJRSjqZDSinlaDqklFKO\\npkNKKeVoOqSUUo6mQ0op5Wj6Yc4yzC215Xnckvf5r39m3e4kPHgY1qM1zWsUNNd88Ivw5cat+Hs8\\nNK0ezbAebUjOyGLYF8s4euIEx07k8H/dLuXimjE+y+82upICjDFn/SN9jTF1jDFn/fFfY0wLY0zj\\ns93vbBWntvz1CeOZPeMtfoqPZ0vC1pKOdEZuyfvzjv3sOHiY2QOuYcx17XhuUUGTcvqRo7wTv4mZ\\n/a9i1h1Xk5CUyrrdiXy5cRs3NG/AjH49eOiKlrz+3TqfZHcrHVKWJ85hnysp+uecn8nNQIkPKbfU\\nludxS9747Xvp2rg2AA2rhJOWfZT0I0cBCPT3J9Dfj8yjxzmek0P2seOEV6zAgMuacF2z+gDsTcug\\naliwT7K7VZk63TPGDACuBipj9d+9AgzHqpw6ALwLvAMEATnAXVhtxBcbY+aJyM3GmGeBToA/VqvL\\nbGNMD2AskAXsB+4HRgPHjDF/AJnAGOAokALcCrQHhtivcxFWieg84D4g0RhzQERWltTXwi215Xnc\\nkjcpPYsm1aLz70cGVyQpPZvQCkFUCPBncKcW9Jj8KRUD/LmmaT3qRVcGIDE9i/s/XELG0WP8u293\\nn2R3q7K4kmoK3IC10hkLVADmi8izwDPA2yJyBTAFGC0iLwKp9oDqBNQVkcvt/UcaYyphDZtHRKQz\\nMAdrgM0AXhORL4BI4HZ7expwlZ0lDhgAtAMeEJENwAJgWEkOqLPmtlozJ+X1ypJ+5CjTf9rI/EE3\\nsmjITWzYncTv+w8CEBNaiQ8HXsv/dWvN8C+X+SqtK5XFIfWdiBwXkSSsVU0VIG8gtAaW2reXAC1P\\n2bc90NYYsxRYiPX1qQ58BEw1xgwH1ojIvlP2SwTeMsZ8B3QB8v6qXS0imSKSfr7e3PngpNry4nBS\\n3piwYJIysvLvH0jPIibUajBOSEqldkQokcEVCfL3p1WdWDbtPcjPO/aTmnUEgM4X1OTXfQd9kt2t\\nyuKQ8n5PHiAX6zQM+3ZeuWLeKZ+3o9grLfvXRSKyVURmYg2fJOBLY8yFp+z3DjDEXkl97vX4qfXq\\njuCk2vLicFLeDg2qs+j3HQD8ujeZ2NBKhFQIBKBmeCgJyWlkH7P+2DftTaZuVBhfyx98vsG60L/5\\nQArVKus1qbNRpq5J2doZY/yxTsHCgGSvbT9jDZvZQGfgF/vxvMG2AphgjBmPNcReFJEHjDFPYl2f\\nmm6MicUq/cyh4OsXDvxhjImwj7/+DPm89ysxbqktd1velrViaVotmttnLMDPAyOvjuPTdQmEVQik\\n24V1GNi2CQNmfU2An4dLasXQuk5VGlYJZ9gXy/j69z84euIET13j3L8QnKhM1azbF85vxFoxXQC8\\niHVBu5mIpBtjagBvY12nOgrcJSK7jTGLgTARibMvnHfDWnFNEZEZxpg7gH9hnT6mAHcAHbAuxD8G\\nGKzrYJuBr7Auqg8HbslrLDbGJIlIFWPMncDTwJ0isrio96I16yWrXt16ACS89Ihvg5wl//4jy13N\\nelkcUs1E5FFfZ/mrdEiVLB1S7lEWr0kppcqQMnVNSkRm+DqDUur80pWUUsrRdEgppRxNh5RSytF0\\nSCmlHE2HlFLK0XRIKaUcrUx9BEGpYgusAICnWfmrLXcbXUkppRxNh5RSytF0SCmlHE2HlFLK0XRI\\nKaUcTYeUUsrRdEgppRxNh5RSytF0SJVhm7ck0O36m5g158PTti2LX0nPfgO4rf9AJk9/2wfpTueW\\nvOPe+4Teo16iz6iX2ZCw46Rti39ZT68RL9J39Cu8v/C7/Mc379xDjwefPukxVTzlbkgZY27xdYbS\\n4Jba8jxuybvy1/+xY18ic555hLH/vJ1n3/04f1tOTg5j//0R0x6/j5mj2JM/sAAAHqJJREFUHmTJ\\n6o3sS04hM/sIz874mLbNSry4ukwqV0PKGFMP6OPrHKXBLbXledySN37TZrq2bgFAw5rVSMvIJD3T\\n6uFLOZxBWEgloiqH4efnR9umhuUbhaDAAKY9fh+xkeE+yex25e179yYDccaYp4DmWLVXAVjtwuvt\\nBuPngGPATuAeCqlLF5GnjTHdOL1aPReYBdQFlgG3ikgtY0wTYJK9/TAwQEQOleQbdUtteR635E06\\nlEbT+rULsoSFkph6mNDgSkRVDiUj6wjb9x6gZkw0K3/dTJsmjQjw9yfA398necuCcrWSwqq4+g5r\\n4CwQka7AIOAle/tE4EYRuRLYD/SyHz+pLt1+rLBq9auBiiLSFvgWqGE/93Xgn/brLQLuL6k3eE7c\\n1hjkoLzeSTweD+MG9WPktPd54OU3qRkT7aisblXeVlJ52gMxxph+9v1gY0xVoBEwzxgDEILVWLwb\\nuy4dwN4GBdXqAUADrKEUC/xkb/8vBQ3GccCb9r4VsEpKfcZJteXF4aS8sZHhJB1Ky79/ICWV2IjK\\n+ffjmjRi1uihALw8+wtqxESXesayprytpPIcxTrFy6tTj7Mf2+31WBsRecF+fmF16YVVq3soqG7P\\npeAv2kygi33cdiLyrxJ5V8XkpNry4nBS3g4tLmThirUAbNq2k9jIcEIqVczffu/zU0hOPUxm9hGW\\nrN5I+2amqEOpYipvK6m8ivMVwN+B5fb1oqtF5GVjDMaYJiLyqzHmAaxTw6IUVq2eAPS0t/eg4Ou7\\nDutUcL4xpjeQeKb24vPBLbXlbsvbsnEDmjaoTZ9RL+Pn5+HJO3vx6XfxhAZXonubi+l5ZXvuHjcZ\\nDx7uvbE7kZVD2bT1D8bP+pTdiQcJCPBn4Yq1THz4biJCQ3zyHtymTDUY/xljTAywCvgEqIN1euYP\\n/EtEfjHGdMS6PnUU2AP0x7oONaSQuvRnOL1avT1WjXtlYClwr4jEGmMuAqZjDcksrGtZB8+UVRuM\\nS1a9C6wVztb/vOfjJGfHr1WPctdgXK6GVEkzxkRhndZ9YoypCSwWkQvP5Vg6pEqWDin3KG+neyXt\\nMHCrMeYxrOt9Q32cRynX0yF1HonIMeA2X+dQqiwpr/+6p5RyCR1SSilH0yGllHI0HVJKKUfTIaWU\\ncjQdUkopR9OPIKjyKe8jkQGBPo2h/pyupJRSjqZDSinlaDqklFKOpkNKKeVoOqSUUo6mQ0op5Wg6\\npJRSjqZDSinlaDqkyjC31JbncUvecTM+pPfw8fQZ8QIbtmw/advin9fS64lx9B35Iu/PX5L/+OY/\\ndtNjyMiTHlPFo0PqDIwxlxtjYu3b240xocXcb7sxJtQY84Qxpl3JpiycW2rL87gl78pNm9mxN5E5\\nzz3O2EH/4Nl35uZvy8nJYezbc5k2bAgzn3mEJas2FNSsvzOXts3O6SdJl3s6pM5sIFZZwzkRkedF\\nZPl5zFNsbqktz+OWvPEbfqdr3MUANKxV/ZSa9XTCgisRFW7XrDc3LF//m1WzPmwIsVFas34uyuX3\\n7hljAoF3serQs7GG0WSsQtBgrJbicKzaq6bGmFvsXYcYY67F+rpdZe87HasctAIwSkQWeb3ODOBj\\nYOEpr9dfREq0J9wtteV53JI36VAaTRvWKchSOYzEQ2l2zXoYGdnZbN+7n5oxVVi5cTNtmjbWmvW/\\nqLyupO4A9olIB+BNrGH0loh0AYYBj4vI18Ba4E4R+cPeb6OIXA7sALoCfYBsuyD0ZmBSMV/vhhJ6\\nX+fGbY1BDsrr3bbk8XgYd/8ARk6ZyQMvTqVmrNasnw/lciUFtAIWA4jIHGNMODDJGPMo1oooo4j9\\nfrR/34210roUq18PEdljjDli11qd8fXO15s4V06qLS8OJ+WNjSqkZj2y4DQurmljZo15FICX3/+U\\nGrFas/5XldeV1AlOfu8PYVWsdwQGnWE/77p1D1aNuncPWhAFNetnej2fclJteXE4KW+Hi5uwcPlq\\nADZt/eP0mvVnXyc5Nc2qWV+1gfbNL/JJzrKkvK6kfgauBD4yxlwHjAQG29tuwho2UFDLfqbjdAHm\\nGGNqAzkicsgY82ev10JEnjsv76QIbqktd1velqYhTRvUpc+IF/DzeHjy7j58umSZVbN+WUt6duvI\\n3WMm4vF4uPemq6ya9YQdjH/vY3YnJhPg78/C+NVMfPQ+IsK0Zr04ymWDsTEmCHgL60L2MWAMMBXY\\niXVd6VX7sTrAP4AbsarUm4lIujFmArARmGXv1xBrsA0Tke+NMduBZvaxPgYWnfJ6d/zZhXNtMC5Z\\n9RrZDcbzZ/s4ydnxa9Gl3DUYl8sh5QY6pEqWDin3cMx1EqWUKowOKaWUo+mQUko5mg4ppZSj6ZBS\\nSjmaDimllKPpkFJKOZoOKaWUo5XXb4tR5V1WJgC533zq4yBnqUUXXycodbqSUko5mg4ppZSj6ZBS\\nSjmaDimllKPpkFJKOZoOKaWUo+mQUko5mg4ppZSj6ZAqw9xSW57HbXkBPJ1vwq/3UPx6D4WqdU7e\\ndnEna9ttD+K54mYfJXQ/1w8pY0w1Y8y0Qh6fYIwZcMpjofbPHz+r2vTzyRhznV0aWqLcUluex215\\nAah1AZ7IGHLmvELOog/w63JLwbaginhaX0nO3NfImfsanqhqUL2ez6K6meuHlIjsE5F/+jqH07il\\ntjyP2/ICeOo0JneL/XPdD+6HisEQZNdbnTgOOScgqAJ4/CAwELKKqnNUZ+L4792zV0OdgSpAU2AE\\nVnNwE6AvsB/4WERaG2P6AY8Du4AsYKMxpjLwCVCRgnJP7+PXAN7Gans5Adzt1ViMMSYAqyK9FlYN\\n+2gR+Y8xphtWq8w+QIBEERltjHkW6AT4A5NEZLYxpjnwHnAQSDiPX54iuaW2PI/b8gIQHEbu/p0F\\n9zPTITgMjmbDiePkLl+A312j4Pgxcn9fDYcSfZfVxdyykmqEVU0+DqsG/Sb7dp+8JxhjPMBzWPXn\\nNwAX2Jv6YdWjd8KqTT/VGOAlEemKNXSePGV7FLDIrlK/FXjafnw8Vt3VVUBLO0MnoK5dxX4lMNIY\\nU8k+5mj7NU6c49eg5LitMcipeU+qia2IJ647Oe+MJeetp/FUrwtVavgsmpu5ZUj9IiK5wF5gvYic\\nwFpBhXs9Jxo4LCIHROQY8JP9eBNgmX17aSHHbg+MNsYsxRqAp/ZipwBtjDE/Ya2o8rbXFZE1dpb/\\neh2rrX2shVhf3+rFyFCqnFRbXhyOzZuRhic4rOB+SDhk2BXsUVUhNRmyMyDnBLm7t+I55cK6Kh7H\\nn+7Zjhdx23PKbe+Kc79CHi9sKB8FeonI3iJe+3as1VQn+/dfCnlO3l/tR4G3RWSc90Z7lXemDKXK\\nu7a8WmwsS77/kQnPPePrWEVyat7c7b/j1/4acjcsg9hakJEKx45YG9MOQnRVCAiE48fwVK1NzrZN\\nvg3sUm4ZUsWRDIQbYyKADKADsBzrelFrrOtShf0wnhXA34E3jDFXAtVE5AOv7VWAbSKSY4y5mYIK\\n9n3GmAuB/wE9gCX2sSYYY8bbz3tRRB7wyrCwiAznnVtqy92aF4C928jdv9P6+EFuDjmLP8LTJI7c\\no9mwZT25Py/Gr9cD1kpqzzbY7YB/kXShMjOk7CEyGvgO2I5Vgw7WBetPjTGLsS6cn3pBYzTwb2NM\\nH3vbgFO2fwJ8YYxpC7wD7DLGjAJGAvOAbcBvwAkRWWaMWYI1HD3AFPsYY+3XeBDYSsGgKzHNmlzE\\nzLemFrm9zaWtmPveOyUdo9jcljdP7o9fnvQfVG7SnoLbG5ZZqyz1l2jN+jkyxvQANovIdvtzWt+d\\nsgL7S7RmvWTVq1UbgISRA3wb5Cz5Pzyx3NWsl5mVlA94sFZoh7E/BuHjPEqVSTqkzpGILMS6xqSU\\nKkE+/5cmpZQ6Ex1SSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNP2clCqfKgYD4Olyg4+D\\nqD+jKymllKPpkFJKOZoOKaWUo+mQUko5mg4ppZSj6ZBSSjmaDimllKPpkFJKOZoOqTJs85YEul1/\\nE7PmfHjatmXxK+nZbwC39R/I5Olv+yDd6dySd9y7H9P7yQn0eXICGxJ2nLRt8S/r6DV8PH2feon3\\nFyzNf3zzzj30+NdTJz2miqfcDCljzABjzISz3CfUGLO9kMcn2Me7whhz2o8NNsa8aoypX8QxK9s/\\nH71EZWZlMWb8BNrFtSl0+9gXXuL1CeOZPeMtfoqPZ0uCb5tM3JJ35a//Y8e+ROaMeZSx9/Xj2Rkf\\n5W/Lyclh7DsfMu2Jwcx8aihLVm9kX3IKmdlHePbfH9K2mfFJZrcrN0OqNInIQyKyrYjNrbAqsEpU\\nUGAgb77+SqElmjt37SY8vDLVq1XFz8+Pzh06sHzlzyUd6Yzckjd+o9C1TQsAGtasRlpGJumZWQCk\\nHM4gLCSYqMph+Pn50baZYfmG3wkKDGDaE4OJjQw/06FVEcrb9+7VN8b8F6gNvAKMApqJSLq9ytqI\\nVVP1CVARqwILAGNMP+BxYBeQRUFlVqgxZhZwMfCRiDxjNxgPAQKxaq2O2L9uAyYDlY0xm0Vkekm9\\n0YCAAAICCv/jTUxKJioyIv9+VFQkO3ftLqkoxeKWvEmH0mjaoHZBlrBQEg+lERpciajKoWRkZbN9\\n7wFqxkSzctNm2jRpRIC/PwH+/j7JWxaUtyHVGGslUxlYB5wo5Dn9gI0iMtQYcxvQx24gfg6r4DMF\\nWOX1/CbAhVir0m2Ad7XuncAUEZmZVzwKvIg1GEtsQJ01t9WaOShvrlfrnsfjYdzg/oycOovQ4ErU\\njI32YbKyo7yd7v0oIsdEJBlIAwr7r6gJkNfouNT+PRo4LCIHROQY8JPX81eLSKaIpHNy7TvA58CT\\nxpgxwAER+f18vZG/Ija2CklJyfn39ycmFnqa5RROyhsbGU7SobT8+wdSUk86jYtr0ohZTz/M1McH\\nERZciRoxUb6IWaaUtyF16l/BiV63A+3fPUCOfduvkMe8Hwc4XtSLichioA3wO/CuMaZUKtb/TK0a\\nNUjPyGDXnj0cP36cJd//SId2l/k6VpGclLdDi4tYuGItAJu2/UFsZDghlSrmb7933GSSUw+TmX2E\\nJas20L7ZhT7JWZaUt9O9dsYYfyAKCAEOAdWNMVuBtsAaQLBO6z4B8oZKMhBujIkAMoAOWFXqZ2SM\\nGQJ8JSLv26eMLYEkSuHrvvHX3xj/8mvs3rOXgIAAFn7zLVd27kStmjXofmUXRg9/nEeeGAnAtVd1\\np37duiUdqUzkbWka0LR+bfo8OQE/Pw9PDryNT5cuJzS4Et3jLqFn1w7c/dzreDwe7v17DyIrh7Jp\\n6x+MnzmP3YnJBAT4s3DFWiY+cg8RoSE+eQ9uU25q1o0xA4CrgArABcALQCXgEazBlAx8D3wGfIq1\\ncvoR6C8i9Y0xA4EHge1YF84X2LeHiEhP+zWSRKSK14XzWsBYIBXrwvmdQAzwNfCSiBT5kQitWS9Z\\n9S6wPg6w9atZPk5ydvxadit3NevlZki5jQ6pkqVDyj3K2zUppZTL6JBSSjmaDimllKPpkFJKOZoO\\nKaWUo+mQUko5mg4ppZSj6ZBSSjlaefu2GFVScnPhSJavUxSf/WNhPLUa+TiI+jO6klJKOZoOKaWU\\no+mQUko5mg4ppZSj6ZBSSjmaDimllKPpkFJKOZoOKaWUo+mHOcuwzVsSGDz0UQb07UO/3reetG1Z\\n/EpenjQFfz8/Lu/YgfvvvcsnGZ97dRLrNm7C4/EwfOgDtGhyUf62b77/kTf+/R5BgUH8rfuV9Ot1\\nMxmZmTz+9HOkHj7MsWPHuP+uAXRqG1eqmcdNfIO1m37H4/Ew4sFBNL+ooJl48Q/LeOPdDwgKDOTa\\nblfQ75Yb+fg/8/l8weL852ySzaz++otSzexmOqS8GGOuBuqLyBtnud8lwE0i8lQR27djl5D+5ZDF\\nVJza8renTKRqbAz97v4nV3XtwgUNG5RWPABWrl7Ljp27mPvWGyRs287wZ8cz9y3rS5+Tk8OYCa/y\\n6btvERFemXuG/h/dOnfkm+9+pH7dOjwy+F72JyZxx5ChLJg7s/Qyr1nP9l17mDvtNRK2/8HwcS8x\\nd9prBZlfmcS8t6dYmR8dQbdO7el53TX0vO6a/P0XfPtdqeUtC/R0z4uILDjbAWXvt7aoAeUrbqgt\\nX/7LKrpd3hGAhvXrkXo4nfSMDABSDqVSOSyUqMgIq7K8dSuWrVxFZEQ4h1JTAUg7fJjI8NKtLo9f\\ntYZundpbmevVIe3w4YLMqamEhRZkbndpS5b9suak/afMmMWgAX1LNbPb6UrKi90o0wyrifh2rMaY\\nz4A3gOUicrExpgawE6gmIonGmHXA/wH3iEhPY8xErEosf+ANEZlhH36IMeZarK/5VSJyuCTfixtq\\ny5OSD9L0woJTpaiIcBKTDxIaEkJUZAQZGVls/2MXNWtUY8WqNcS1asm9/W9n3lcL6N7zdtLSDjPt\\n5edLNXNi8kGamoLv94uKiCAxOcXKHBFBRmYW23fupmb1qqxYvZa4lhfnP3fDb0K12BhiorUw9Gzo\\nkDpdfeBSoKN9/yfgIyDN7t3rgFV91dYYE4/Vo3cEwBgTBfxNRBoaYwKBAV7H3SgizxtjZgNdsYaf\\nMzikMcg7hsfj4flRwxj+7POEhYZSq0Z1IJfP5y+iRtVY3n71RX7/3xaGP/sC82b4rrHeu23J4/Hw\\n/IjHGDHuJUJDQ6hVvdpJ2z/6cj43XdPDFzFdTU/3TtcKaAQssX+FAfWAH4DLsIbUa0A7oD2Qf4FB\\nRA4Cm40xnwO3Ae95HfdH+/fdQOmeo5zCKbXlsVWqkJR8MP/+gaQkYqKj8+/HtbqED6ZNYtpL1qCq\\nWb0aq9dvoONl1oXyCxtdwIGkJE6cOFGKmaNJPClzMjFVClZGcS1b8P6Ul5n2whhCQ0OoWb1q/raV\\na9bTsnmTUstaVuiQOl0OVuvwFfav5iLyPbAUq+W4EfAF0BRrYC3x3llErgGeBi4BvvTa5F3H7tPu\\nNKfUlne4rA0Lv10KwKbfNxNbpQqhIcH52+9+6DGSD6aQmZXFkh+X0a7NpdStVYt1m34FYPfefYRU\\nqoS/v3/pZY67lEVLf7Ayy/+IrRJNaHBB5nseGU5yipV56U/xtG/dCoD9SckEV6pIUGBgqWUtK/R0\\n73TfAV2MMcFYTcWvAk8Ay4DHgDQRyTHG5GKtukZiragwxtQDbhCRicBqY8wqH+QH3FFb3qpFM5pe\\naOh9z2A8Hj+eeuwh5v1nPmGhIXS/4nJuvfF6Bj74KB4P3Nu/L1EREdx20/UMf3Y8/Qb9i+PHTzD6\\n8UdKN3PzpjQ1jel930P4eTyMengI8/67iLCQYLp37kivG67lrqHDrJr1f/QmMsJaNCcmJRPtdR1Q\\nFZ82GHvxunC+FRgInAA+E5Fx9vZlwBf2taWxQJyI9DDGXIFVq3471ileHazrVB+LyGTvjyAYYyZg\\nXZ+acaYsrmswzs1l+7rS/xfCc1Xv4tYAbFv1k4+TnB1PTN1y12CsQ8qLMeZeoIGIPOHrLDqkSpYO\\nKffQa1I2Y0w74HHgG19nUUoV0GtSNhFZDjT0dQ6l1Ml0JaWUcjQdUkopR9MhpZRyNB1SSilH0yGl\\nlHI0HVJKKUfTjyCo88PjgQqVfJ2i+I5kA5Czyl0fi/O/2jc/QdWXdCWllHI0HVJKKUfTIaWUcjQd\\nUkopR9MhpZRyNB1SSilH0yGllHI0HVJKKUfTD3OWYW6oWffmlrzPz1vMuh178QDDbu5K87rV87d9\\n8MNqvvzlV/w9HprWqcawm7syddFylst2AHJyc0lKy2D+yHt8E96FdCX1J4wxt5zl8wcYY246w/ae\\nfz3VnytOzfrrE8Yze8Zb/BQfz5aEraURq0huyfvzlj/YkZjC7KH9GNPnGp6btzh/W3r2Ed75diUz\\n/3U7sx7qS8K+ZNZt38N9Pdrx7gN9ePeBPtzStgU927XwSXa30iF1Bnb7S5+z2UdEZojIp0UcLwh4\\n+DxE+1NuqFn35pa88Zt30LWF1WDcsFo0aZnZpGcfASDQ359Af38yjxzl+Ikcso8eIzy4Yv6+x0/k\\nMOfHNfTt1Mon2d1KT/fObDIQZ4wZBXSxHwsE7hCRBGPMrVhD5ziwSkQeNMaMxmo1ngl8CFSwf90P\\n3AU0N8ZMEZHBJRncDTXr3tySNyktgya1q+XfjwwNJiktg9CKFagQGMDgq9vTY8x0KgYGcE3LC6kX\\nW1Ac+vX6zXS4sD4Vg7R772zoSurMXsTq4ZsPPCMiXYB3gMHGmFDgOaCbiHQEGhhjunjt2xXYJSJX\\nAH2BWPt4UtID6qy5rTHISXm9sqRnH2H61/HMH3E3i0b9kw079vL77gP52+fFr+fmy5r7IqWr6ZAq\\nnn3Av4wx3wNDgWigMfA/EUm3n7MUaOm1z3KgnTFmKnCBiCwoxbxn5JSa9eJyUt6Y8FCS0jLy7x9I\\nSyemcggACfuSqR0dQWRoMEEB/rRqWItNO/cBkHnkKPsOHaZmdLhPcruZDqnieQZYKCKXY1WoA+Ry\\ncl16EFZFOwAishe4GJgHDLJPGR3BKTXrxeWkvB0urM+itQLArzv3EVs5lJCKFQCoGRVOwv5kso8e\\nA2DTH/uoGxMJgOxJpEFstE8yu51ekzqzHKyvURUgwRjjAW4E/IHNQCNjTJiIHAY6A2OBbgDGmG5A\\noIjMN8b8CkwBZlBKX3M31Ky7MW/L+jVpWrsat78yCz+Ph5G9uvPpig2EVaxAt4sbM/DKOAZMmkOA\\nnx+X1K9J64a1AUhMTScqLNgnmd1OG4zPwBgTA6wCkoFKwHbgdWA6cCcQCjyCNcx+FJFhXhfO/wPM\\nwrqongM8BcQD64BNItLrTK/tugZjwC15AerVtyoWE94d7+MkZ8f/6rvKXYOxrqTOQEQSgTqFbKrp\\ndXveKfuM9rrbsZB9m/z1ZEqVH3pNSinlaDqklFKOpkNKKeVoOqSUUo6mQ0op5Wg6pJRSjqZDSinl\\naDqklFKOph/mVOVTkPVznvxadvmTJypf05WUUsrRdEgppRxNh5RSytF0SCmlHE2HlFLK0XRIKaUc\\nTYeUUsrRdEgppRxNh1QZtnlLAt2uv4lZcz48bduy+JX07DeA2/oPZPL0t32Q7nRuyTvu9WncNmgo\\nvQc9zIbf5KRti39YTs97/8Xt9z/CrE++yH/8y0XfcuOdg7n57gdYunxlaUd2tXI7pIwxVxhjPv6T\\n5xSrMt0Yc7UxZtD5zvhXuKW2PI9b8q5cu57tu/Yw941XePbxhxg7cWr+tpycHMa8OoXpLzzDrNdf\\nZMmyFew7kEhKahqTZnzA+5MnMPX5p1n843KfZHercjukiqO4lekiskBE3ijVcH/CLbXledySN37V\\nWrp1agdAw3p1SDucTnqG1cOXkppGWGgoURER+Pn50e7SS1i2ai3Lf1lD+9aXEBocTGyVKMY89qBP\\nsrtVufnePWNMIPAuUBfIxmoiDjXGzMLqx/tIRJ4xxiwFNtq7JVGMynRgJdBMRB41xrwMxAEVgaki\\n8pYxZgawB7gUq9ihr4isLsn365ba8jxuyZt4MIWmjRsVZIkIJ/FgCqEhIURFhJORlcn2nbupWb0q\\nK9asJ+4Sq0knK/sIg54YTVp6OkPu7Eu7S1sW9RLqFOVpJXUHsE9EOgBvApWxmlvuBdoBD3g9d6OI\\nDPG6X6zKdGNMRWC7XbveCatUNE8FEbkKeA3of57f21/jtlozB+X1roTzeDw8P/wRRox/hSEjxlCr\\nelVyc624h9LSeH3sk4wb9jDDx72CVskVX3kaUq2AnwBEZA7wG7BaRDLtqnTvPrNTr2wWqzJdRLKB\\nKGPMMmA+EOO1+Qf7912AT7u2nVRbXhxOyhsbHU3iwZT8+weSDhITHZV/P+6SFrw/aQLTxj9NaEgI\\nNavHEh0VQctmTQgI8KdOzRqEBFfi4KFUX8R3pfI0pE5w+vs9XsRzj3rfKW5lujGmM3Al0NledR0p\\n4rV8WvDopNry4nBS3g5tWrHoux8B2CRbiK0SRWhwQTPxPY89SXLKITKzslm6bAXtL21JxzatWLF6\\nHTk5OaSkppGZlU1keGWf5HejcnNNCvgZa4B8ZIy5DmhR3B3PojK9CrBTRI4ZY24A/O0L7KXOLbXl\\nbsvbqnkTmja+gN6DHsbPz8Ooofczb/7XhIUE0/3yDvS67mruemQEHg/c2/dWIiOsRXOPzh257b6h\\nAIx8cBB+fuVpffDXlJuadXtYvIV14fwY8G/gJhHJ+xhBkohUsS+cDxGRjcWtTAe+ApoBY4CvgSzg\\nM6A9kAb4Ax+LyH/sAdlTRAacKa/WrJeseqYpANt++d7HSc6Op2qDclezXm6GlNvokCpZOqTcQ9ec\\nSilH0yGllHI0HVJKKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH0yGllHI0/TCnUsrRdCWllHI0HVJK\\nKUfTIaWUcjQdUkopR9MhpZRyNB1SSilH+3/FKGgzACmLwQAAAABJRU5ErkJggg==\\n\",\n            \"text/plain\": [\n              \"<Figure size 216x1836 with 2 Axes>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"DRL6XhixwueM\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    }\n  ]\n}\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_tfjs/notebook/EvaluatingClassifier.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"EvaluatingClassifier.ipynb\",\n      \"version\": \"0.3.2\",\n      \"provenance\": [],\n      \"collapsed_sections\": []\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    }\n  },\n  \"cells\": [\n    {\n      \"metadata\": {\n        \"id\": \"DnVolqQO5UMn\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"4e8cb139-8ed2-4b08-e282-57465b9aa39e\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 53\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!pip3 install --quiet \\\"tensorflow>=1.11\\\"\\n\",\n        \"!pip3 install --quiet sentencepiece\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\u001b[K    100% |████████████████████████████████| 3.2MB 10.3MB/s \\n\",\n            \"\\u001b[K    100% |████████████████████████████████| 1.0MB 19.5MB/s \\n\",\n            \"\\u001b[?25h\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"nworUNj67VL5\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"import os\\n\",\n        \"import pandas as pd\\n\",\n        \"import tensorflow as tf\\n\",\n        \"import matplotlib.pyplot as plt\\n\",\n        \"from sklearn import metrics\\n\",\n        \"import sentencepiece\\n\",\n        \"import zipfile\\n\",\n        \"from google.colab import auth\\n\",\n        \"from google.colab import files\\n\",\n        \"from IPython.display import HTML, display\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"koTqnJ5t7vR5\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Use Kaggle's My Account page to down load a kaggle.json file and re-upload it here.\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"zuJpXuS07hrD\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"b60430c3-dd36-44e4-f054-be4befe8998a\",\n        \"colab\": {\n          \"resources\": {\n            \"http://localhost:8080/nbextensions/google.colab/files.js\": {\n              \"data\": \"Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=\",\n              \"ok\": true,\n              \"headers\": [\n                [\n                  \"content-type\",\n                  \"application/javascript\"\n                ]\n              ],\n              \"status\": 200,\n              \"status_text\": \"\"\n            }\n          },\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 76\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!mkdir -p /root/.kaggle\\n\",\n        \"token_file = \\\"/root/.kaggle/kaggle.json\\\"\\n\",\n        \"uploaded = files.upload()\\n\",\n        \"with open(token_file, \\\"wb\\\") as f:\\n\",\n        \"  f.write(uploaded[\\\"kaggle.json\\\"])\\n\",\n        \"  os.chmod(token_file, 600)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/html\": [\n              \"\\n\",\n              \"     <input type=\\\"file\\\" id=\\\"files-c219c0bd-6da2-4f3a-85d7-47b14b17d7d2\\\" name=\\\"files[]\\\" multiple disabled />\\n\",\n              \"     <output id=\\\"result-c219c0bd-6da2-4f3a-85d7-47b14b17d7d2\\\">\\n\",\n              \"      Upload widget is only available when the cell has been executed in the\\n\",\n              \"      current browser session. Please rerun this cell to enable.\\n\",\n              \"      </output>\\n\",\n              \"      <script src=\\\"/nbextensions/google.colab/files.js\\\"></script> \"\n            ],\n            \"text/plain\": [\n              \"<IPython.core.display.HTML object>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Saving kaggle.json to kaggle.json\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"pF9BCpwc76_b\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"import kaggle\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"5IMnm-_f91DV\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Download the test set and extract the labeled portion\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"exMy3FQp8xg8\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"96b64e4c-c76d-4db8-b527-84bf334c66cd\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 71\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"kaggle.api.competition_download_file('jigsaw-toxic-comment-classification-challenge', 'test.csv')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \" 21%|██▏       | 5.00M/23.4M [00:00<00:00, 29.0MB/s]\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Downloading test.csv.zip to /content\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"100%|██████████| 23.4M/23.4M [00:00<00:00, 53.3MB/s]\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"GFjhB3WO9RuC\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"97272c2f-fb02-4d61-a184-74778d81097d\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 71\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"kaggle.api.competition_download_file('jigsaw-toxic-comment-classification-challenge', 'test_labels.csv')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"100%|██████████| 1.46M/1.46M [00:00<00:00, 119MB/s]\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Downloading test_labels.csv.zip to /content\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"4Grw9zJt9Udw\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"test_labels = pd.read_csv('test_labels.csv.zip', index_col='id')\\n\",\n        \"testset = test_labels.loc[test_labels['toxic'] != -1].join(\\n\",\n        \"  pd.read_csv('test.csv.zip', index_col='id'))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"neCTJdjJ-hKn\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Load the pre-trained toxicity model from Google Cloud Storage\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"DYVE2PB99XZx\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"auth.authenticate_user()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"D9gQqslA-RKJ\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"069cf772-656f-4696-e961-ec1d67902b9c\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 89\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!mkdir -p tfjs_model\\n\",\n        \"!gcloud storage cp --recursive gs://conversationai-public/public_models/tfjs/v1/* tfjs_model\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Copying gs://conversationai-public/public_models/tfjs/v1/saved_model.pb...\\n\",\n            \"Copying gs://conversationai-public/public_models/tfjs/v1/assets/universal_encoder_8k_spm.model...\\n\",\n            \"Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.data-00000-of-00001...\\n\",\n            \"Copying gs://conversationai-public/public_models/tfjs/v1/variables/variables.index...\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"ZwYHVxgE_BIS\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"9bc8b76a-2371-4291-b086-99d20ccf51de\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 289\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"predict_fn = tf.contrib.predictor.from_saved_model(\\n\",\n        \"  'tfjs_model', signature_def_key='predict')\\n\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\",\n            \"WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\\n\",\n            \"For more information, please see:\\n\",\n            \"  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\\n\",\n            \"  * https://github.com/tensorflow/addons\\n\",\n            \"If you depend on functionality not listed there, please file an issue.\\n\",\n            \"\\n\",\n            \"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/predictor/saved_model_predictor.py:153: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.\\n\",\n            \"Instructions for updating:\\n\",\n            \"This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.\\n\",\n            \"WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\\n\",\n            \"Instructions for updating:\\n\",\n            \"Use standard file APIs to check for files with this prefix.\\n\",\n            \"INFO:tensorflow:Restoring parameters from tfjs_model/variables/variables\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"CrVX18LN__4r\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Load sentence piece model and preprocess test data\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"bMjJEb25_59p\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"5IYO0GF2_fEf\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"7c81d410-f695-4bbc-daff-d4a10f23ace9\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 35\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"sp = sentencepiece.SentencePieceProcessor()\\n\",\n        \"sp.Load('tfjs_model/assets/universal_encoder_8k_spm.model')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"True\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 36\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"x2votZMZAnnG\",\n        \"colab_type\": \"text\"\n      },\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Score the sentences with toxicity model\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"uU3xQGiKA993\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"def progress(value, max=100):\\n\",\n        \"    return HTML(\\\"\\\"\\\"\\n\",\n        \"        <progress\\n\",\n        \"            value='{value}'\\n\",\n        \"            max='{max}',\\n\",\n        \"            style='width: 100%'\\n\",\n        \"        >\\n\",\n        \"            {value}\\n\",\n        \"        </progress>\\n\",\n        \"    \\\"\\\"\\\".format(value=value, max=max))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"Xs3Glf93Bp6O\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"071890ba-b1b0-4e17-fd5a-6af93eaffb20\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"tox_scores = []\\n\",\n        \"nrows = testset.shape[0]\\n\",\n        \"out = display(progress(0, nrows), display_id=True)\\n\",\n        \"for offset in range(0, nrows):\\n\",\n        \"  out.update(progress(offset, nrows))\\n\",\n        \"  values = sp.EncodeAsIds(testset['comment_text'][offset])\\n\",\n        \"  tox_scores.append(predict_fn({\\n\",\n        \"      'values': values,\\n\",\n        \"      'indices': [(0, i) for i in range(len(values))],\\n\",\n        \"      'dense_shape': [1, len(values)]})['toxicity/probabilities'][0,1])\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/html\": [\n              \"\\n\",\n              \"        <progress\\n\",\n              \"            value='63977'\\n\",\n              \"            max='63978',\\n\",\n              \"            style='width: 100%'\\n\",\n              \"        >\\n\",\n              \"            63977\\n\",\n              \"        </progress>\\n\",\n              \"    \"\n            ],\n            \"text/plain\": [\n              \"<IPython.core.display.HTML object>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"nXLm_GNWbuqP\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"10537a68-d4be-4a10-9ab1-284489974cc7\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 71\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!gcloud storage cp gs://conversationai-public/public_models/tfjs/perspectiveapi.csv .\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Copying gs://conversationai-public/public_models/tfjs/perspectiveapi.csv...\\n\",\n            \"/ [0 files][    0.0 B/  2.0 MiB]                                                \\r/ [1 files][  2.0 MiB/  2.0 MiB]                                                \\r\\n\",\n            \"Operation completed over 1 objects/2.0 MiB.                                      \\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"9-x3fQEjb2-X\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"perspective_api=pd.read_csv('perspectiveapi.csv')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"dGdFOCzbzDTJ\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"top_kernel = kaggle.api.kernels_output(kernel='tunguz/superblend', path='.')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"ydkfpaSV4GCK\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"top_kernel_scores = testset = test_labels.loc[test_labels['toxic'] != -1].join(\\n\",\n        \"  pd.read_csv('superblend.csv', index_col='id'), rsuffix='_predicted')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"nkeZUDG_31c4\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"30e2889b-15b0-4aea-bf2b-86794434776b\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 376\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"plt.figure()\\n\",\n        \"\\n\",\n        \"fpr, tpr, _ = metrics.roc_curve(testset['toxic'], tox_scores)\\n\",\n        \"plt.plot(fpr, tpr, label='Tensorflow JS model')\\n\",\n        \"\\n\",\n        \"fpr, tpr, _ = metrics.roc_curve(testset['toxic'],\\n\",\n        \"                                perspective_api['PerspectiveAPI'].values)\\n\",\n        \"plt.plot(fpr, tpr, label='Perspective API')\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"fpr, tpr, _ = metrics.roc_curve(top_kernel_scores['toxic'],\\n\",\n        \"                                top_kernel_scores['toxic_predicted'])\\n\",\n        \"plt.plot(fpr, tpr, label='Top scoring Kaggle kernel')\\n\",\n        \"\\n\",\n        \"plt.xlabel('False positive rate')\\n\",\n        \"plt.ylabel('True positive rate')\\n\",\n        \"plt.legend(loc='lower right')\\n\",\n        \"plt.ylim(0.75, 1.0)\\n\",\n        \"plt.xlim(0.0, 0.25)\\n\",\n        \"plt.title('Performance on Kaggle Toxic Comments Challenge Test Set')\\n\",\n        \"plt.show()\\n\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAf8AAAFnCAYAAACoxECQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xd4FNX6wPHv7qZX0oHQO4TeqyAE\\nRIqoiAZFuKKCiCAoKqIgVUQRUAT7Va7KFVCwwAVFqUr7gVgoUlMJ6T2bZNv5/REZWEMIYDb1/TwP\\nzzNzzpR3J8O+O2dmztEppRRCCCGEqDb05R2AEEIIIcqWJH8hhBCimpHkL4QQQlQzkvyFEEKIakaS\\nvxBCCFHNSPIXQgghqhmn8g5A3LjmzZtTr149DAYDSim8vLyYMWMGPXr0uKHtZGdnM2bMGPLy8li3\\nbh1+fn4Oirjy6d+/P6+++iqdO3cG4NChQzz99NN8/PHHNG7cuFT3FRcXx6BBgzhx4sR1r3P33Xdj\\nNBqxWCzExsbSsGFDABo3bsyqVatuOIY1a9aQlZXFlClTrnudrKwsli9fzs8//4xOp8PJyYlRo0Yx\\nbtw4dDrdDcdQ1o4ePYqnpyfNmjW7ofU2btzImjVrKCgowGQy0alTJ5599lmCgoLYsGED27Zt48MP\\nP7zu7VksFsLCwvjpp5/YtWvXDa9fmv7880+mTZsGQE5ODrm5uYSEhAAwcuRIHn300Rve5pkzZ8jO\\nzqZjx45F6lJSUli0aJF27js7OzN+/Hjuvvvua27TaDTy448/Mnz48BuORxSS5F9JffLJJ9SsWROA\\nI0eOMGnSJLZt24a/v/91b+PUqVNkZGSwe/duR4VZJZw5c4ann36alStXlnriv1kbN24ELv9w2LZt\\n2z/a3rhx425oeavVysMPP0zLli3ZvHkzLi4uXLx4kcmTJ5OVlcXUqVP/UTxl4YsvvqBnz543lPw/\\n+eQTPv30U95++20aNWqEyWRi1apVjBkzhm+//daB0ZaNFi1aaOfSzfyQuZpt27bh4eFx1eQ/Z84c\\nmjdvzrJly9DpdJw7d46IiAhatGhBq1atit3mH3/8webNmyX5/wOS/KuATp06Ua9ePY4ePcqAAQP4\\n4YcfeOONNzAajdSvX5+lS5fi7+/PypUrSUxM5M8//6Rfv3588cUXpKamMnjwYNauXcuZM2d45ZVX\\nyMvLw9vbmzlz5tCmTRs2btzIjh07yM7OJiwsjL59+7Js2TLatm3Ljh078PX15aWXXmLp0qWcP3+e\\n++67T/vyX7VqFd988w1Wq5XGjRvz2muv4ePjw8qVK0lPT9fi8fPzY/Xq1QQHBxMbG8vMmTNJSkrC\\nx8eH+fPnExYWRkJCAnPnziUyMhKAWbNm0bdv3yLHIz4+ntmzZxMXF4ezszOPPPIId955J3FxcURE\\nRDBhwgQ2bNhARkYGzz//PEOGDCn22CYmJjJp0iQWLFhA+/bttfIff/yRFStWYDKZ8PT0ZNGiRbRs\\n2RKbzcaiRYvYtm0b9erV49Zbb2Xv3r188sknxMXF8cQTT5CVlUXv3r1JTEzktttuo2vXrtp2lVKs\\nWrWKb7/9FpPJxIABA3j++ecxGAw3dE7s37+fJUuWkJ+fj4+PD3PnzqVVq1Y8+uij9OnTh7Fjx5KZ\\nmcnQoUP597//zZYtW0hPT2f+/PlER0czc+ZMUlJS8PX1ZcGCBbRs2dJu+zt37iQtLY2XXnpJi61W\\nrVosW7aMnJwcoPCHyezZs4mPj8fZ2ZkJEyZwxx13EB0dzdixY7n//vu1HzFLlixh1apV2rm5YMGC\\n614O4Pvvv2flypUYjUYaNmzI0qVLqVGjBsuXLyc3N5f4+HhOnTpFQEAAq1evZtu2bWzevJk9e/aQ\\nkpLCoEGDeO6550hJScFkMjF8+HCefPJJu89ssVhYtWoVy5cvp1GjRgC4uLgwbdo02rRpY7fs3Llz\\n+fnnn3F1dWXFihU0adKEpKQkZs6cSXx8PGazmXHjxjF27Nhi/4aZmZnMnz+fY8eOYbFYmDJlCnfe\\neafWUvDqq6/y0UcfkZKSwsSJE3nwwQex2WzMnz+f7du3U79+ffr27cv+/fv5+OOPi93ejfr000/5\\n9NNPMZlMdO7cmYULF+Li4sK+fftYsmQJZrMZgKeeegqLxcJHH32Eq6srGRkZPP3003bbOn36NPfe\\ne6/WUtS4cWO2bNlCYGAgAAcPHmTJkiVkZ2cTEBDAsmXLtG0bjUbGjRvHmjVrbvgzCECJSqdZs2bq\\n4sWLdmUjRoxQe/bsUTExMapDhw7q1KlTSiml3nnnHTVlyhSllFJvvvmm6t27t0pNTVVKKXXgwAEV\\nHh6ulFIqJydHdevWTR0+fFgppdS2bdvUoEGDlNVqVV9++aVq3769ioyM1NYLCwtTBw4cUDabTY0c\\nOVLdfffdymg0qlOnTqlWrVqp/Px89ccff6gePXqo7OxsZbVa1b/+9S+1atUqLZYePXqouLg4ZbPZ\\n1IQJE9Tq1auVUkqNGzdOffbZZ0oppbZv366GDBmilFJq7Nixavny5UoppaKiolTXrl1VWlpakeMz\\nfvx49c477yillIqLi1OdOnVSsbGxKjY2VrVq1Up98sknSiml/ve//6mBAwde9RjfeuutateuXeqO\\nO+5Q69evt6szm82qc+fO6ujRo0oppVauXKnGjRunlFJqx44dKjw8XOXk5Kj09HQ1ePBgNWbMGKWU\\nUlOmTFGvvvqq9rlat26tvvzySxUbG6tatmyplFJq06ZNaujQoSorK0uZzWY1YcIELd6ruXLdS7Kz\\ns1XXrl3Vr7/+qpRSasuWLWrw4MHKZrOp2NhYdcstt6i0tDQ1f/58tWzZMqWUUsuWLVOzZ89WSik1\\nZswYtW7dOqWUUlu3blXDhw8vst+XX35ZzZkzp9i4lCr8O77//vtKKaViYmJUx44dVXx8vIqKilKt\\nWrVS33zzjVJKqUmTJqn+/furtLQ0lZqaqsLCwlRcXNx1LxcZGak6dOigzp49q5RS6q233lLTpk3T\\nPlevXr1UfHy8stlsavz48erdd99VSikVERGhNm/erJRSatGiRdr5l5ubq5588kmVnJxs93lOnjyp\\n2rZte83PvH79etWhQwd14sQJpZRSs2fP1o7rnDlz1Lx585RSSkVGRqqwsDCVkJCgzGazatasmUpK\\nSlLr169X48ePV0op9eyzz6rnn39eWa1WlZycrHr37q3Onj2rLX/pb/fLL7+odu3aKavVqrZv364G\\nDRqkcnNzVVpamho0aJB2bha3vWt9lkuxXPLTTz+p3r17q5SUFGWz2dRzzz2nxTFs2DDtnDtz5ox6\\n9tlnlVJKTZs2TX3wwQdX3cf8+fNVz5491XvvvadOnjypbDabVpeenq46deqkDh06pJRS6osvvlAR\\nERFKKaXWrl2rJkyYcM2/hbg2eeCvCti9ezcpKSl07NiRPXv20LVrV60pMyIigh07dmC1WgFo167d\\nVW8N/P7779SsWZNOnToBcNttt5Gens6FCxcAaNCgAQ0aNNCW9/HxoVu3buh0Opo2bUrXrl1xd3en\\nadOmWK1W0tLSaN26Nbt27cLLywu9Xk+HDh2IjY3VttG5c2dCQ0PR6XS0bNmSixcvUlBQwMGDBxk2\\nbBgAAwYMYP369RiNRg4ePMi//vUvAOrXr0+nTp2K3LIwm83s27eP+++/H4DQ0FC6devGgQMHgMKr\\nt0v3E8PCwoiPjy/2uM6bN4+MjAxSU1Ptyp2cnNi3b5/WEtC5c2ftcx0+fJh+/frh6elJjRo1GDp0\\nqLbe4cOHtc8VHh5OcHBwkX3u3LmTkSNH4u3trd1D//7774uN8WqOHj1KnTp1aNeuHQC33347SUlJ\\nXLx4kTp16jBu3DieeeYZ9u3bx+OPP263rtFo5P/+7/+0uAcNGsTnn39eZB+ZmZna1dnVFBQUcODA\\nAUaPHg1A3bp16dKlCwcPHgQKbxvcdtttADRr1oy2bdvi5+eHv78/AQEBJCUlXfdye/bsoWfPntot\\nmdGjR/PDDz+g/uq5vGvXrtSqVQudTkerVq24ePFikXgDAgLYs2cPR44c0a7W//75MjMzCQgIuNah\\n1+K81FLSqlUrEhISgMLWgFmzZgGF/5/8/Py0/19Xs3PnTsaOHYterycwMJCBAweyfft2rX7EiBFA\\n4Xmcl5dHRkYGhw8fpn///nh4eODn52fXqlXS9q7Hzp07GT58OAEBAeh0OiIiIrRt+Pv78+WXXxIZ\\nGUmTJk1YsmRJidubNWsWkydPZteuXYwcOZI+ffrw3nvvoZTi0KFDNGjQgC5dugBw1113ceLEiSL/\\nH8XNkWb/SurBBx/UHvgLDQ3l/fffx9PTk+zsbA4fPszgwYO1Zb28vMjIyADA19f3qttLS0vDx8fH\\nrszb21v7j/b39Tw9PbVpvV6Ph4cHADqdDr1ej9VqJS8vj8WLF2tf+JmZmfTr189u+5cYDAasVisZ\\nGRnYbDatTqfT4enpSWJiIkopIiIitHWMRiPdu3e3iysjIwOllN22fXx8SEtL0/ZzKVa9Xo/NZrvq\\n8QB4+OGH6devH/fccw9t2rShV69eWt0nn3zCpk2bMJlMmEwmrdkyKytLe0AKsJvOysqyO45X1l2S\\nnZ3Nhx9+yLp164DC5Hcjz3FA4d/yyv3odDrtb1m7dm3uueceli1bxmOPPYarq6vduhkZGej1eu3v\\ne+Xf9kp+fn4kJiYWG0N6ejpOTk5254mPj492Pjk7O+Pi4gLY/00uzV/6sXo9y2VlZXHgwAG7c97T\\n05PMzEyg8Py/5NK5+Xfjx48H4KWXXiIlJYUxY8bwxBNPFPnMSUlJ2Gw29Prir5uu/MxXfpZff/2V\\n5cuXk5CQgF6vJy0t7ZrnX3Z2NlOmTNFuqxQUFNj9mLx0jl+KxWq1kpmZSf369bVlrjzHStre9cjK\\nymLv3r3s2LEDKLxNdenzvfbaa6xevZoHH3wQLy8vnnnmGQYMGHDN7RkMBu6//37uv/9+cnNz2bFj\\nBwsXLiQoKAir1cq5c+eK/F0l+ZcOSf6V1JUP/F0pODiYnj178uabb97Q9gICArQfCFD4n/rSlc75\\n8+dvKsY1a9YQFRXFxo0b8fT0ZPny5ddMGFD4BavT6UhPT8ff3x+lFDExMdSuXRuDwcCXX35p9+V6\\ntfX1ej2ZmZlaAszIyLiuK7a/a968OaGhoSxevJgZM2bw5ZdfUrt2bX755Rfef/99NmzYQJ06dfj5\\n55+ZPXs2UJhojEajto3k5GRt2tPTs9i6S4KDg+nfvz9jxoy54XgvCQwMtPtb2mw2uyv1lStXMnLk\\nSDZs2EBERITdFa6fn5+WUH18fLTjf2VCAejWrRsvvvgiBQUFdj8gIiMj2bNnD6NHj8ZisZCTk6Ml\\n35v9O5QkJCSEPn36sHz58pvehrOzMxMnTmTixImcP3+eRx55hM6dO9v9uGzcuDG+vr7s3LmzSFJb\\nuXIlDzzwwDX3MWPGDCZOnMioUaPQ6XT07NnzmssHBQXxzjvvFHnI1GKxFLvOtc6/4rZ3I4KDg7n3\\n3nuZPn36Vevmzp3LSy+9xO7du5k+fbr2w/9qcnJyOHLkiPbcjqenJ8OHD+eXX37h9OnT9OjRgxYt\\nWvDf//63yLpHjhy56c8gCkmzfxXTu3dvDh8+rDVD//777yxcuLDE9dq2bUtKSgpHjx4FYMuWLdSs\\nWZM6dercdCypqak0atQIT09PLly4wO7du+2+mK7GxcWFXr16sWnTJgD27t3LhAkTcHZ2pm/fvloT\\ndF5eHs8//3yRJlwnJyd69+6tXTnHxMRw+PDhEr9or6Vfv36MHDmSKVOmYDKZSEtLIyAggNq1a5OX\\nl8emTZswGo0opWjTpg27du0iPz+frKwstm7dqm2nbdu22vzOnTu1pu0rDRgwgK+//pq8vDwAPv/8\\nc+1YXK927doRHx/P77//DsA333xDvXr1qFmzJseOHWPPnj3MmjWLBx54oMi54e7uTo8ePbQH7Hbt\\n2sWkSZOK7OOWW26hfv36zJw5k9zcXKDwQcvp06djs9m0v+Olv0NUVBRHjx694ddRr0efPn04dOgQ\\ncXFxQOFtj8WLF5e4npOTE1lZWUBh8/P+/fsBqFevHoGBgUVeVzQYDEybNo0FCxZw/PhxoPA209Kl\\nS9m5c6dda9PVpKWlERYWhk6n44svvqCgoOCa/x8GDBigne9ms5mFCxdy8uTJa+6jbdu27Ny5k4KC\\nAjIyMvjuu+/+0fauFtO2bdu0H5dbt27l448/pqCggAcffJDU1FR0Oh2tW7fGYDBor4BeOs5XUkrx\\nzDPPsHnzZq0sMTGR/fv306VLFzp16kRsbKz2GmBkZCQzZ84ECv922dnZ2q0dcePkyr+KCQ4OZsGC\\nBUyePBmz2Yynp6d2n/FaPDw8WLFiBQsWLMBoNOLv76+9fnOzIiIimDp1KrfddhvNmzdn5syZTJky\\nhY8//via6y1atIgZM2awdu1afH19Wbp0KYB2VbFhwwYA7rjjDmrVqlVk/Xnz5vHiiy+yceNGnJ2d\\nWbhwIbVq1dKSw82YPn06v/76K/Pnz2f27NmsXbuW8PBwQkJCmDVrFr/99htTp05l2bJl7Nq1i8GD\\nB1O/fn1uv/12Lak888wzPP3002zZsoVbbrmF9u3bFzm+4eHhnDlzhrvuugsoTESLFi26oVi9vLxY\\nsWIFc+fOxWg0EhAQwOuvv47NZmPOnDnMnDkTV1dX/vWvfzF8+PAiz00sXryYp59+mk8//RRfX19e\\ne+21IvvQ6/W8++67LFu2jBEjRuDs7Iy7uztjx47Vnqm4dKw2bNiAs7MzixcvJiQkhOjo6Bv6PCWp\\nWbMm8+bNY9KkSVgsFry8vHjhhRdKXG/gwIEsWbKEmJgYRo8ezdy5c8nNzUUpRXh4ON26dSuyzqhR\\no3B1deX555+noKAAnU5H9+7d+fjjj3F2dr7m/p588kkee+wx/Pz8GD16NKNGjWLWrFl89tlnV11+\\n+vTpzJs3T3vmoW/fvjRr1uyaCe+2226zO/8GDx6sXSUXt70b0aFDB+0NDKUUgYGBLFy4EFdXV0aM\\nGMGYMWPQ6XQYDAbmz5+Ps7Mz/fv3Z9asWVy4cEH7vwyFty0++ugjli1bprVUurq68vDDD9O/f38A\\nli9fzosvvkheXh4uLi489dRTQOFzHG+88Qb9+vWTV5Vvkk7JTychSpVSSkvqn332Gfv27dM63rmy\\nbuTIkUyaNInw8PByi1VUPVeeY2vWrOHIkSM3fBtQVH3S7C9EKTp58iQDBgwgMzMTi8XC999/r70V\\nsGTJEubNmwfAuXPnOH/+PK1bty7PcEUV88cffxAeHk52djZms5kffviBDh06lHdYogJyaPI/ffo0\\n4eHhfPrpp0Xq9u3bxz333MN9991n1x3pyy+/zH333UdERIR2z1KIyqJly5bceeed3H333QwZMoSQ\\nkBDt4b2HHnqIqKgoBg4cyOOPP86cOXOu+tCmEDerTZs2DBs2jDvvvJMhQ4YQGhqqvW4pxJUc1uxv\\nNBqZOHEiDRo0oHnz5kWeXh4yZAgffvih9uU4f/580tLS+PDDD3n33Xc5d+4cs2bN0h4YEkIIIUTp\\ncNiVv4uLC++///5VOzKJjY3F19eXWrVqodfrtS4o9+/fr93/bNy4MZmZmVpXoUIIIYQoHQ5L/k5O\\nTri5uV21Ljk52a7jEn9/f5KTk0lJSbEbWe5SuRBCCCFKT4V+1e967khc+WSrEEIIURFY8/LIOvkn\\n6opeHI3RMSiLBa7IWSar6Zo9PV5N0wduvhOwS8ol+QcHB5OSkqLNJyYmEhwcjLOzs115UlISQUFB\\n19yWTqcjOTnbYbEKCArylmPsYHKMy4YcZ8erqsfYajRiuhiPNdv+sxlP/Yn+r1ZuZTJhPHEMm8mM\\nOTHBYbFU2uRfp04dcnJyiIuLo2bNmuzcuZOlS5eSnp7OypUriYiI4Pjx4wQHB9v1yy2EEEKUBktW\\nFqqgAACb2UxG1Gnyz54h49RxrJ7umG1mTFYTBp2BwJj0m97Pz+08UX9rnE7yt+8QqrZnCJ7ORcfQ\\nKE6vkhcpkcOS/7Fjx1iyZAkXLlzAycmJ7777jv79+1OnTh0GDhzI3LlztbGdhwwZQsOGDWnYsCFh\\nYWFERESg0+l46aWXHBWeEEKIcpJtyiHPkndD6yQZU8jPzkRvvGK9fBOGlDTUlQMtKcXF3ERcDC4k\\nG1NBB275VpodiMbJbKPARY+rqfhmdncAMq5al+FlINXXwIVgF1wMzrgaXNHbFHmezhg9CxO60kGm\\nnxtWg44MWy4hHsG0CwrDy8WLQLfCZ9oUEOQegLuTGy4GF/S6su9yp0r08FcVm5gqkqrajFeRyDEu\\nG3KcHS8oyJuExAyS8lKIyorl05PrcTG4cOnit8BqKpxQCr8sq1autymC0yxY9faXya4mG51PGvE2\\n3th98WvJ8HfDNd9Ctq8rud6Fo0Y65eSR17AWuDjj3LUTjWs0wkXvTLBHIKBD5+SE7q8fGQadHoPe\\nUGrx3KigoGuPI3E9KvQDf0IIISqG9PwMIrNi+D35BB7OV3+TK99SwMGEIxisiuZR+dxyJIdRNQrT\\njKvBRVvOKyMf14KiQytfD3OIP+bQy6+QG3KMFDStZ/cQnU3Z8HT2wM3JDTeDK+5O7iizGZ/evXFy\\n95SHxJHkL4QQ1ZLFZiHHnEuOKZdscw6n0s4SmRXN2YxIdOjsEqRN2XAx22gRmY+LWeGRbqFmqhmf\\nXBsK7O5pdwX0V7Qn104xg94AuvzLhVc83e7b91YtcVtzcnBv1Ai9m7tdrMpixiOsDS4hIaV5CKo1\\nSf5CCFEF2JQNmypMqhabhZNpZ0jNTyPXbCQ3L7sw0ZtzyDHlkmPORRmN+OQWLu+XZcHZoqidbaVJ\\nno2G8SbyalxOwL4pxQ89rAMMDepr887OBnQ2AwWxsfgPvh2fXr1x9g9wzIcWN02SvxBCVGLRWbF8\\nfOK/JBkLX5N2siiC0s0EZlhoezqPepk317zukmJE7/7XDwA3N2z5hVfuwQ/+C52TE24NGuAcGITe\\n1dVuPXmuonKQ5C+EEJWEUgpbbi55sdGcy4ji7K5vcS9Q3HehgAxvA/5ZxSd61+bN0RuctAfsUGDN\\nM+LRomXhrNWKe6PGoNfhVq8BToGBcm+8CpPkL4QQFYjJauJ8ZnRhD6e/n0R/6Fd0ZyLJ9XHFM6tA\\nW84daHPFev5ZVgzePlizszD4+ODTszc6JwOebTvg3qhRmX8OUbFJ8hdCiHJiNBvZd/H/+DPtDO4W\\nHamnjxGaUEC9BBMhaRa7ZT2zCsh21+OdZyPRz4nYWi4EeAXTsmYrat8yCIOvr1ypi+smyV8IIcqI\\nxWbheOopsk3ZfHHsC7r9kUvtZDODUi3FrmNsVJvUfm0x1KtLm4CWuBhcaOTkRh+9fH2LmydnjxBC\\nOFjOudN8t2EZzhZFi6gCnPXw+NX6rNHp8OnbD9eatfHu1g2Duwc6J/maFqVPziohhHCAzMQ4Ivd9\\nj8eWPYD9/XknG5j8vHE12Qi47Xa8u/fAycdXEr0oM3KmCSHETVBKkX/+HNmH/w+sVtDrSY89i+7U\\neW2ZK4dqyXXTE/zQeAKC6uJat57cnxc3rMBkJSUzT7r3FUKIsmBOS8WSlkbW/n2g15O588erLvf3\\ndB5dxx2/gFAsw/rTtk57PG5g5DZRtR07n0p6dkGx9cmZeWTkmEhINXL2QqZd3bevj/jH+5fkL4QQ\\nVzAlJGDLz6PgwgWyD+zDePJEscta9RAX7MKpBq6k/NWHfYvQNtzabjghHkE0K6ugRYWXmpnP8ag0\\nbErxv/3RpGTml7zS34T4e2Czlc4AR5L8hRDVmrJaKYiNIfXbr8n97ddil0vzMRBT04VT9d3A1ZkM\\nbyfydRbcndy4v8U9dAhqI035VZDFaiMpPY+YxGxy8swUmK2cu5CFl4dzkWWPnEoiyNe9SHlMUs5V\\nt+3j6cKofo2L3bdS0KCWN17uztTwci12uZshyV8IUa2YU5K5+N47OIeEkL1/31WXOVfHhUwvAyg4\\nW8+NlBAP+tftQ9+aHRnhVsNuhDpRdVisNqxWhdlqY8naX7iQnHvD24hJysHd1X64XzcXA/kmK0E1\\n3OjcIpjQQE9CA72oX/Of37u/WZL8hRBVnlKKvD9PcmT2f8i/mABA/vlzWr0l2I+Luhx+6OZDlqde\\nG2WuTWBLHms6gkB3/3KJW1ybTSmSM/JIzcznQnIuFpsNHTqUUvx2NgU/n6JDDyekGUnNzMfT3f7K\\nPTGt+MGLmoT64uXuTICvG83q1sBmU9QL8cKgL9rSE+jrjv4q5RWNJH8hRJVlNRqJX7mCvDOni9St\\nHexHvquePFc9Ficd4AfAnO7PEOIRVMaRiuIopcjNtxR2dwxsPxxLbGIONgV/nE8tYe3MYmty8sz4\\nel5uwfHxdMGYb8HP24VaAZ4kZ+QxaURr6gR7lcbHqHAk+Qshqgyb2Yzx5HHyIyMxnjxB/tkzdvUX\\ngpz5vocPWV72zbIt/Zvh4eTOmJajcJEm/XJnsdpYt+MsPx6Ju67lm9bxJayBPwq0pnSlFLUDPXFx\\nMhRZ3tVZj4db0Xv21YkkfyFEpWc8eYLET9dgTky8av1/b/Mjyd8JdDq8Xbxo6VWb2xuE09C3Hnqd\\nvoyjrd6M+WaM+RaOR6WRkGbUkvOJ6DSS0vPQ63Rk5pqKrNepWVDhu5QKQoM86d+pDga9Ds9qnsRv\\nliR/IUSlZDObyD93jrilS4rUBYy4i1wnGx/k7SW1xuWvuQ9GvEqBDDVfZo6eSWb9jrME1ih8Av54\\nZFqJ6/h4uhDi505qVj7jBregZ+ua8haFA0jyF0JUCkopzEmJGE+cIHXzN1gzM+zqbc4GPr+rNslO\\nBbgZjpBvzQfXwq+4iOZ30Se0Bz5u3iRnS/a/UcZ8C1/sOkt0YjYuTgb+nouz88xcSM7F3dWAXqfD\\npsBktmK1Fd6nT0zPs1u+TpAngb7u+Hm70qVFsLa9Gt6uhPhJR0hlQZK/EKJCyzq4n4T33y22/kxd\\nV4608iAxwBko7DEt35pPqFctEnKTeLXPXNycSvcd6aomLSufvAILyZn5/BmdzsGTiYTUcOd0XCYu\\nTnpMluvrWCavwErD2j5Y/lrxzRexAAAgAElEQVTealPk5plZ+Gg3rXnfYNChlyv5cifJXwhRISml\\nOPvcU6i0dLvyY43dcDYrDvetS0iN2rQJbMUtNitBHgF4O3tR26smTjLc7TUdO5/KWxv/IMTfg9hi\\nOqDJzCm8726y2GhQ05uohGweGdaSsIYBeF+lgxsAvU5HUJA3ycnSulLRyf8QIUSFoWw20iJPEfPu\\nW3im2Xew8sb9wdRw9WVej+dw0jsxvJxirKiUUhw5lUxOnpmT0el4uF3+ek9MM5KQZiQjx/5Butik\\nnEvP0NGyvh81Azwwma20qOcn99qrOEn+QohyZzObOPfkEyhTYXLyvKJuRxcv2g4fy9OewdT1Cq32\\nV/X7jydw+M8kPN2cyckzcyau8NmH3HzLdW+jXrAXOr2OZ0d3wN21eh/P6kr+6kKIcmG2momMPUH8\\n2o+pfd6+af94Y3eatL+FFoPu4TFD9X6Vy2K1kZVrYvev8Xy7L6rY5XS6wmb3to0D6NIimBpervh6\\nXe6zwN3VqdT7hxeVlyR/IUSZi/5lLwWrPwSg9hXlH94ZwNgej3JXQIvyCawCiU7IZt7H/3fVun4d\\nQhnUpS56vQ5ngx4/b0nq4sZI8hdClIlsUw7RZ3/FZdkHduX5IX5YO7emw10PU/SN/erldGwGB44n\\nsOvXeLtyX08X/H3c6B4WwsDOdcspOlGVSPIXQjhMQm4ia058Tnx6HA9/lYKbSWl1+S46mixfiYdr\\n1ew7/XoopTh4IpFth2Kw2RRxfxtFrkW9Gky+u430YidKnSR/IYRD5Cde5Pgbc7gryWxXbvL1xO3h\\nB2nbqns5RVb2lFLs/i2e5Iw84pNzMVkK7+NfSLn6kLHPjO5A83o15H144TCS/IUQpSol+jQpry9H\\nb8zjygZqJ39/aj02GfdGjcstNkdSSnEsMo2jZ1LsxnM3mWz8+Mu1B6jp3iqEe/o1xtPNGVeXogPR\\nCFHaJPkLIW6aOSUZa04u6T9+T0bKBQxnogGwGypn6iM0bdOrSr8zbjJbeez13SUu1699bbqH1cTD\\nzYmgv8Z9d3aSgYVE2ZPkL4S4IeaUZOJWvI45IcGu/Mrr1QwvA3nj7yasaQ8C3f3LNsAypJRi++E4\\nPv/x8tDBLev7cWefhnZN9gaDjnoh3tKMLyoMSf5CiOtiNRqJXfIypgv2Tdhn67lhsNg438KfOs06\\nMLTTqCo/TG5aVj7JGXksWXvUrvy5+zvQvJ5fOUUlxPWT5C+EuCpls5G5exdpWzdjSbMfitXk6cq2\\nLh5E1nYmwM2fQfX78UitzjhX4d739h9PYO9v8fwZk1Gkrn/HUB4Y2KxK39oQVUvV/Z8qhLhptoIC\\nzk6eWKTc6OfB/qYGjjVxp6ZnCOPq30qn4HYY9FX3ITWrzcYn351iz28X7cr9fVxpVrcGAzrWoXGo\\nbzlFJ8TNkeQvhEApRd7pU6Rs/IKC2Bitj30A1bE1W1sbOGMpTH71vEN5tMEA2ga2qrLN+xarDZPZ\\nxqE/E/nPtlNaeYCPGy+M7STd5IpKT5K/ENWYzWwm6ZM1ZO376ar13/TzI7J2EljA08mDh8Lup4V/\\n0yrXvF1gtmKx2th3LIH1O85itakiy4zs24ihPRqUfXBCOIAkfyGqqZzffiV+5Qq7MreGjQh56GFM\\n/j7M3LdAK6/rHcpTHR/HpQoNsmOx2ti09zxbD8QUu0yDmt60auDPXbc0xKCvmq0conqS5C9ENZMf\\neZ6YRfPtygJH3YffoMGk5afz1P5X7Ooea/sv2gS2KssQHabAbOWVz37BarNx7kKWXV1ooCfBfu7k\\nFVh48p520tmOqNIk+QtRTSiLhTOTHgV1uUnbyc+fhotfJT4/mbcPLSc+9/K7+01rNGJIw3Ca+TUp\\nj3BLTUxiNrt/i8dqVez5Lb5I/ZDu9bmnX9XsdVCI4kjyF6IasJlNnJ00wa6s8Yq3MHh5YbFZePnQ\\ncru6V3rPwdulcg+4Y8w38+n3pzlwIrFI3WMjwmjXOFCu7kW1JclfiCrKmpND7rHfydyzm7zTl59Y\\nDx4zlhr9+mvz03a9oE3P6f4MIR5BZRqnI7z86RHOxmXalU0d2ZYGdWvg42qQnvZEtSfJX4gqJv2H\\n7aRs3GD3ut4ltadOw6tte21+8/nvUBTeBohofnelT/xWm41HX91lV3Zrx1DG/NUBT1CQN8nJ2eUT\\nnBAViCR/IaoIpRTnpk/BlpOjlend3fHt0xevTp1xb3z53n1CbhILDi7V5ut6h9IntHIPsbvoP4c5\\nF3/5Ib5GtX14cWzncoxIiIpLkr8QlZiy2Qo75bFYiF28UCv3aBlG6LSn0BkK72n/nnycP05+gQ0b\\nBy4eLrKd5zpPLbOYS1NegYW9v1+0G1gHYMrINnRoWrlbMYRwJEn+QlRSxXXBW/PRx/Dp1p34nARO\\np59jw5mvr7q+t4sXc7rNwMPZw9Ghlrp9xy7yweaTRcofur0FfdrVLoeIhKhcJPkLUUmdf+5pbbpG\\n/3B0Tk743tofl6BgLDYLiw4ts1u+lmcIdzQaTE3PYALc/Ctlf/xKKZav/41jkZcHGqoV4EHHZkHc\\n0ashzk7SEY8Q10OSvxCVjLJYiFm8ULu3X2/2XNzqN9Dqt0fv4qtz/9PmH249hpoewdT2qlnWoZYa\\nm1Ks+/Es2w/H2pW/90w/nAyS8IW4UZL8hahkzj01FZvRCIB7s+Z2iT+zINsu8c/sMo263pW7Gfw/\\n2/5k16/2nfMM7VGfkX2lYx4hbpYkfyEqkcT/fKQl/loTJuHdtZtW994f/+G35GPa/Kr+r5Z5fKVF\\nKcWmvefZvC/arrxry2Am3hFW5QYWEqKsOTT5v/zyy/z222/odDpmzZpF27ZttboffviBt99+GxcX\\nF4YOHcqYMWM4ePAgTz75JE2bNgWgWbNmzJ4925EhClFpZB08QOae3QC4t2ipJf60/HQWHHwdk/Xy\\ne/0Le84qlxhLg9Vm46m3fibbaNbKOjUPYvJdbcoxKiGqFocl/0OHDhEdHc26des4d+4cs2bNYt26\\ndQDYbDYWLFjApk2bqFGjBo8++ijh4eEAdO3alTfffNNRYQlR6RTEXyB921Zt2F3Ptu0InTodm7Kx\\nI3Yvm85u0ZZtH9SGR9s8WF6h/mM//3GRD7dcfoq/brAXcx/qIlf6QpQyhyX//fv3awm9cePGZGZm\\nkpOTg5eXF+np6fj4+ODv7w9A9+7d2bdvH6GhoY4KR4hKKeHfH2hJ/5LaTzyJ1WZl6q7n7cqf7DCR\\nZn6V9z74lBV7yM23aPP/ur0FfdrWksQvhAM4LPmnpKQQFhamzfv7+5OcnIyXlxf+/v7k5uYSFRVF\\naGgoBw8epGvXroSGhnL27Fkee+wxMjMzeeKJJ+jVq5ejQhSiwkr99mtSv95kV1ZrwiS8unRle/Qu\\nvj6/VSvvVrMTQxsOIsDdr6zDLDXPvr1PS/xdWwYz4Y4w6X9fCAcqswf+1BXDiOp0Ol555RVmzZqF\\nt7c3derUAaBBgwY88cQT3H777cTGxjJ27Fi+//57XFxcrrntoCBvh8Yu5BiXhUvHOG7jV3aJ38Xf\\nny4fvQ/Ac9+/TGT65dfdZvSaSNc67amsElJzefTlH7T5/p3rMn10R4fuU85lx5NjXPE5LPkHBweT\\nkpKizSclJREUdLm7za5du7J27VoAXn/9dUJDQwkJCWHIkCEA1KtXj8DAQBITE6lbt+419yUDdTiW\\nDIbieEFB3iRdTMd48gQX1nyilTf74GMA/oyJYc7+xVp5fZ+6PNt5ClA5z3+rzcbqTcc4eubyd8SE\\n4a3oHlbToZ9HzmXHk2PseKXx48phyb9Xr16sXLmSiIgIjh8/TnBwMF5el8cHf+SRR1iyZAnu7u7s\\n3LmThx56iG+++Ybk5GQefvhhkpOTSU1NJSQkxFEhClFh5JyP5Mz0GXZlTd//CJuy8cWZb9kd97NW\\n3r9uH0Y2HV7WIZaaF94/wMVUo13Ziim98fG8dgufEKL0OCz5d+zYkbCwMCIiItDpdLz00kts3LgR\\nb29vBg4cyL333sv48ePR6XRMmDABf39/+vfvz4wZM/jxxx8xm83MnTu3xCZ/ISozZbOR+vUm0rZ8\\nq5X53T4U/9uHkpafzpz9r9gtv6zvQlwNle//hMVqIy0rn5nvHrArv6NXA+7s06icohKi+tKpK2/G\\nV1LSxORY0oznGMpq5czEh+3Kmqx+D72LS5Gn+R9seS/da1Wu4WltSnE2LpNXPvulSF3XlsE8NqJ1\\nmcck57LjyTF2vArd7C+EuLaEf3+gTTea8DCGzr3Q6fWk5afzwbFPtbpXes/B28Xrapuo0J5Yvod8\\nk1Wb93J3xt/HlVH9mhDW0L8cIxNCSPIXoowpi4WYVxZREBUJFA7BW2voQJKTszmS+Cv/PbWRPEs+\\n7QLDGNNyVKUacvdUTDofbjlJSma+Vta+SSB39mlIvRB5AlyIikKSvxBl7MqBeVxq1sKnW3fyzPn8\\n58Q6DiYcwcXgwgMtRtGjVudK1cHNa/89ysnodG1eBwzsUpeIAU3LLyghxFVJ8heijCiLhfh3V2uJ\\nv/aUaXi1a09kZgyfHFpHYk4y9bzr8FDYaII9gkrYWsWRm2/mP9tOaYlfr9OxYmpvvNydyzkyIURx\\nJPkLUUbOPPaINu1SqzYebduyLepHtkRuRynFwHr9GNZoEE76yvHfMifPzNQ39tqVDexcl9HhcqUv\\nREVXOb5lhKjECuJiiZ57eXTK4AfH4d6rF8uOvE1kVjQ1XH2Z2uMhQvS1yzHKG7P3t3g+2vqnNh/i\\n507zen6S+IWoJCT5C+FApuQku8Rf67HJeHfuwpSdM7EpGwadgee7TqNhiGN7tvunjPlm4lONLF//\\nK24uTqRnF2h1r03qSYCvWzlGJ4S4UZL8hXAQZbEQ9fyz2nzTdz5A5+TEU7tfxKZsADzVaRJezp7l\\nFWKxLFYbv5xOZtvBGKIS7H+U5BUUvr7n7mrgrWm3VKqHEoUQhST5C+EAymq1u8cfOn0Gm2N+ZFvU\\nj1rZsIaDaOBTrzzCK1ZmronpK38qtr5Xm5oM7FxXXtsTopKT5C9EKUv8z8dk7tmlzQeOvJfvXKP4\\nMWqPVjas4SBubxhe9sFdQ+TFLBasOWxXdku72tzWtS61Aipe64QQ4uZJ8heiFOX+8btd4ncZfQ9r\\n/KI5GxOplb1+y3zcnCrWPfKv9p7nm5+jtPkVU3vj41H5xhAQQlwfSf5ClKILbywDwDk4hLjJd/L5\\nqU2Qcbn+rVuXVKh75CkZeTz7zn67sndn9MXZyVBOEQkhyoIkfyFKgVKK6Jde1OYDZj7LJyc+0ubH\\ntryPbrU6lUdoxTp0MpF3vj6uzTerW4Pn7u9QoX6cCCEcQ5K/EKXgzKMPadNeEaNYceIjEo1JdK/V\\nmTEtRlWohGpTilnvHSApPU8re3VSDwJ93csxKiFEWZLkL8Q/dOGtN7Rpt1F3sdr9N9KNGQyoewt3\\nNRlaYRJ/vsnCxt3n+eFInFbm4qxn5ZN9pJlfiGpGkr8Q/0D0wnna6Hz6tmGs8viVnIJcRjS6nYH1\\n+1WYxK+U4vFle+zKxg5uTr/2oeUUkRCiPEnyF+ImxSxeqCV+GtRhdbtMTGYzo5vfTe/Q7uUb3N+8\\n+83le/tjBjWjX4dQ9BXkh4kQouxJ8hfiBimrlTMTH9bmbb7evN3LCjYY3/oBOga3LcfoitrzWzyH\\nTiYBENG/Cf071inniIQQ5U2SvxA36PzT07TpvM6teL9ZKs56AxPbjKOFf8UY2MZitfHhlpMcPJFo\\nVz6wS91yikgIUZFI8hfiBpyeMB5shf3y/9jFm2NNU/B08uDx9uMrTFe9aVn5zFi9z67Mz9uVpY/3\\nrDDPIAghypckfyGuk/H0KS3xR9d05ljTwlfjpneaRC3PkPIMTROXnMOcDw9p83ff0ohhPRuUX0BC\\niApJkr8Q1ylty7cAWHw9+ap/YV/3E9uMqxCJPy0rnx9/iWPrgRit7PXJvfDzdi3HqIQQFZUkfyGu\\ng81swnj8GAD/HlB4xd8puB1tg8LKM6yrNvEDrH7qFtxc5L+3EOLq5NtBiBLYCgo4O3miNp/npifY\\nI5BxrSLKMSr4/lAMn+84q827OOt5dFgr2jQKwMVZOu0RQhRPkr8Q15Cy8QvS/rdZm//3HQEAvNT9\\n2XKJx2K18e8tJznwt6f4lz7eE3+fijVSoBCi4pLkL0QxCmJj7RL/5j4+ZHsZWNTrhXKJ50xcBos/\\n/cWu7NaOoTwQ3gy9Xp7iF0JcP0n+QvyNNSeH5C/WkfXTXq3sjfuDAXj9lgW4OZX9Q3QrNvzG7+dS\\ntfl7b23C4G4V49VCIUTlI8lfiCsom41z056wK1s9KhCAWV2nl3nitylFZHyWlvj9vF1ZPKG73NMX\\nQvwjkvyF+IvNZOLs4xO0+VqTJjMzYz381TFOqFetMotFKcXUN/aSm2+xK399cq8yi0EIUXVJ8hfi\\nL4kf/1ubrj15KntrpEFmYeKf0WlymcVhsdqY8Nouu7IOTQN5YGCzMotBCFG1SfIXAsiPPE/2oQMA\\nWMbcxXPpn0N6YV2bwJY09K1fJnHEJeUw59+Xe+h7YGAzBnSSgXiEEKVLkr+o9qx5ecQsmq/Nr7L9\\nrE23D2rNI60fLJM4dh29wH++O6XNPzO6Ay3r+5XJvoUQ1Yskf1GtWfPyODdlkja/8r4gbfqNfi/j\\npC+7/yJXJv6V0/rg6eZcZvsWQlQv+vIOQIjyknv8mF3i57Gx2AyF9/hnd3u6zBK/1Wbjidd2aPP/\\nntlfEr8QwqHkyl9UO8pmI3rubEzxF7Sy0BnP8Wz8RwDU965LzTIarMemFI++ukubv7NPwzLZrxCi\\nepPkL6oVqzGXc1MvP7nvWq8+IVOn8vQvS7Sypzs9XmbxTLziqf5xg5vTt31ome1bCFF9SfIX1Yay\\n2ewSf/ADD/JDaC47r0j8/2o1GoPe8R3o/P11vmcf7EyLUB+H71cIIUDu+YtqIvvw/3FmwnhtvtHS\\nFeyqW8DOuJ+0sqc7PU6Xmh0cHotSyi7x39a1Ln3kil8IUYbkyl9UeZk/7bHvwGfqdJS3J9t/2QVA\\nLc8QXuj6FDpd2QyOs2lvpDa94JFuhAZ6lsl+hRDiEkn+okpTSmmJ3+DlTYPFr2Jwd2f1b5d/DLzY\\n7ekyjeno6WQA7uzdUBK/EKJcSPIXVVrWvssd9jRa9gY6vZ4sUzbHU/8E4Il2j5RpPAdOJHAhJReA\\n8M51y3TfQghxidzzF1WWKTGRxI8+AKBG/3B0ej0mq5nnf1qgLdPCv2mZxXM8Mo33vjkBgLeHMx5u\\n8ttbCFE+JPmLKsmWn0fUC89p84Gj7iWzIJvpu1/Qyl7rM6/M7vNn5pp4fd2v2vwbU/uUyX6FEOJq\\nSkz+Fy5cYOrUqTz4YGH/5uvXrycqKsrRcQnxj6R+87U23fiNVeicnJn18+Ur/jndZuDh7F4msRw4\\nnsD0lZffKvjguVvLZL9CCFGcEpP/7NmzGTFiBEopABo2bMjs2bMdHpgQN8uWn0/699sAqPX4FAye\\nnjy95/I5+3KvFwnxDC6TWD79/hTvfXtCm1/4SDf0ZdTaIIQQxSkx+ZvNZgYMGKA1j3bp0sXhQQlx\\ns2wFBZx94jFt3qtde6KzYimwmgCY1PYhfF3LpjOdL3adY8cvhV0Ih/h7sPTxntSWp/uFEBXAdT1x\\nlJWVpSX/M2fOUFBQ4NCghLhZVyb+Os/MRGcw8OrhlQDodXpaB7YskzjWbj/ND0fitPnFE7qXyX6F\\nEOJ6lJj8J0+ezL333ktycjLDhw8nPT2d1157rSxiE+KGpG37H/x1eyp0+gw8mrcgNS9dq3+tz1yH\\nx6CU4u2vjnH4VOG7/HWCPJn/cDeH71cIIW5Eicm/VatWfPXVV5w+fRoXFxcaNmxIUlJSWcQmxHXL\\nj44i5Yv1AHh16IRnWGtyzUbm7F8MgA4dbk5uDo3BZlM88upObb5ZHV9mjunk0H0KIcTNuOY9f5vN\\nxuTJk3F1daV169Y0a9YMnU7H44+X3ahnQpREKUXMgrkAGLy9qT15CgBbI38oLNMZeO2WuQ6P49X/\\nHtWmB3Wpy3MPdHT4PoUQ4mYUe+W/efNmVq5cSXR0NC1btkSn06GUQq/X07t377KMUYhiWY1Gzk29\\n/GO00dIVACTmJrH7wj4C3QN4sdvTOOsd26HOV3vPczo2A4Axg5rRv2Mdh+5PCCH+iWK/EYcNG8aw\\nYcNYuXIlU6ZMsavLzs6+ro2//PLL/Pbbb+h0OmbNmkXbtm21uh9++IG3334bFxcXhg4dypgxY0pc\\nR4i/u/juam261sTH0RkKh+PdeHYLNmXjriZDHZ74N+05z7f7orR5SfxCiIquxG/FKVOmcPbsWdLT\\nCx+cMplMLFy4kK1bt15zvUOHDhEdHc26des4d+4cs2bNYt26dUDh7YQFCxawadMmatSowaOPPkp4\\neDgxMTHFriPE3xlP/Ynx+DGg8Ml+j+YtAPgz7QzHUk/StEYj2gWGOTSGX04n2yX+f8/s79D9CSFE\\naSgx+S9atIiffvqJlJQU6tWrR2xsLOPHjy9pNfbv3094eDgAjRs3JjMzk5ycHLy8vEhPT8fHxwd/\\nf38Aunfvzr59+4iNjS12HSGulB8VSdxrr2jzlxK/1WblyzPfokPHyKbDHdp975FTSazadEybl8Qv\\nhKgsSkz+v//+O1u3buXBBx/kk08+4dixY2zfvr3EDaekpBAWdvmqy9/fn+TkZLy8vPD39yc3N5eo\\nqChCQ0M5ePAgXbt2veY61xIU5F1iPOKfqUjH2FpQwIGF87T5nps2oNMXPru6/exe4nMTuLVhTzo2\\nauGwGP6MTrNL/BuXDMfZ6Z8NlVGRjnFVJsfZ8eQYV3wlJn8XFxegsKc/pRStW7dmyZIlN7yjS90D\\nA+h0Ol555RVmzZqFt7c3depc/R7pletcS3Ly9T2DIG5OUJB3hTrGKZu+1KabrHqXlNTCIXLzLHn8\\n9/evcTG4MLB2f4fG/Mybe7Xp957pR0Z67j/aXkU7xlWVHGfHk2PseKXx46rE5N+wYUM+++wzOnfu\\nzEMPPUTDhg2v64G/4OBgUlJStPmkpCSCgoK0+a5du7J27VoAXn/9dUJDQykoKLjmOkIoi4W0Ld8C\\nEDT6AfSurlrdd1E7yTHnMrzRYId24btlf5Q2veyJXjgZZHBMIUTlUuK31rx58xg6dChPPfUUI0eO\\npH79+rzzzjslbrhXr1589913ABw/fpzg4GC75vtHHnmE1NRUjEYjO3fupEePHiWuI6o3c0oyZx57\\nRJuv0ffy6HgpeansjN2Ln2sN+td13HC5Wbkmvtx9HoDwznWo4eVawhpCCFHxlHjl//LLL/PCC4Vj\\noA8fPvy6N9yxY0fCwsKIiIhAp9Px0ksvsXHjRry9vRk4cCD33nsv48ePR6fTMWHCBPz9/fH39y+y\\njhAAlox0Imc+o83XnjwFndPl03fT2f9hUVbubDIEF4OzQ2LIzDXZDc17V59GDtmPEEI4WonJ32Aw\\nsH//fjp27Iiz8+UvVb2+5KbOGTNm2M23aHH5AaxBgwYxaNCgEtcRAuD8jOnadINFS3AJCdHmz6Sf\\n49fkP2joU59Owe0csn+rzWaX+J+6tx3uro7tP0AIIRylxG+vDRs2sGbNGm1eKYVOp+PkyZMODUyI\\nS9J/uPx2SeM3V2HwuDwsrk3Z+PLsZgDuaea4V/ve//aENr1iam98PFwcsh8hhCgLJSb/I0eOlEUc\\nQhQr+fPPAPDt288u8QMcTPiF2OwLdAnpSAOfeg7Zv1KKQycLB7MaHd5UEr8QotKTdktRoZkSErTp\\n4AfG2tXlWwr45txWnPXOjGg82CH7zzKamPbm5eb+gZ3rOmQ/QghRliT5iwrLZjYR9eJMAAy+NbSO\\nfC7ZHrOLLFM2tzcIx8+tRqnuOzPXxGv/PUp8yuX39x8a4rhOg4QQoixJ8hcV1tlJE7Tpei/av/mR\\nlp/OjzG78XXxYWD9fqW6X5PZavdwH8DSx3vi7+NWqvsRQojyUmLyz8zM5J133iE5OZmlS5eyY8cO\\n2rdvr/XLL4QjGE/9qU3XefZ5nP387Oq/PrcVs83CiMa342oo3Xvwb391udveGRHtadVAznUhRNVS\\n4vt6L774IrVq1SIuLg4oHNXvueeec3hgonq7NGiPk58/Hs2a29VFZkZzOPFX6nnXoUvNDqW73+Qc\\nfjuXCsBT97WTxC+EqJJKTP5paWmMHTtWe8d/8ODB5OfnOzwwUX0l/udjbbrh4lft6pRSfHGmsHvf\\nkU2Ho9eVXte6FquNOR8e0uZbNwwotW0LIURFcl33/M1ms/b+dEpKCkaj0aFBierLnJ5O5p5dANQI\\nH2jXix/A4cRficqKoUNwW5rUaFhq+z0Vk86StUe1+eVP9Cq1bQshREVTYvJ/4IEHuOeee0hOTuax\\nxx7jjz/+0Lr7FaI02UwmIp+53JNf0L2j7epNVhNfn9uKk96JOxsPKdV9v3dFJz6PDGuJr/TZL4So\\nwkpM/rfffjsdO3bk6NGjuLi4MH/+fIKDg8siNlHNZO7aoU03Wrq8yKt9P8bsJb0gg0H1byXQvfTu\\nxf96NoX07AIA3p3RD2cnGaVPCFG1lZj8+/bty7Bhw7jjjjvs+uYXorRl/rQHKOzMx6mG/dP9GQWZ\\nfB+9A28XLwbVv/Vqq98Um1K8+cXvAPh6ukjiF0JUCyV+061fv56goCBmz57NiBEj+PDDD0lMTCyL\\n2EQ1kvnTXkzx8QB4d+tepP7bc99hspkZ3ug23J1K7337t778Q5teOrlnqW1XCCEqshKTf82aNXno\\noYfYsGEDq1atIi4ujvDw8LKITVQjaZu/0aYNHh52dTFZcRxMOEKoVy161OpSavs05pv59WwKAE/c\\n3QbDdYxUKYQQVcF1Pe1/+vRpvvvuO77//ntq1KjBnDlzHB2XqEbyY6IxpyQD0OStd+zrLAUsOfwm\\nAHc0Glyqr/bN/ej/tMH5tdwAACAASURBVOmOzYJKbbtCCFHRlZj8Bw8ejLu7O8OGDeODDz4g5Ipx\\n1IUoDcnrP9em9W72Tfpbo37Qppv7NSm1fWblmkjJLOyvYkZE+1LbrhBCVAYlJv+33nqLJk1K70tX\\niL/L+/MkAA0WvmJX/nvycX6I2Q3A/c1H4mxwLrV9/ue7U9q09OInhKhuik3+06ZNY8WKFTz88MNa\\nBz9Q2MOaTqdj165dZRGfqOIy9+7Wpl1q1rSre/ePNdp015odS22fZouNX04X3maYcnebUtuuEEJU\\nFsUm/xdffBGAtWvXFqnLy8tzXESi2rBkZJC45iMAfHr3savLNV/uRXJFv5dx1pfeAJT7jycAoAM6\\nyL1+IUQ1VOzTU4GBgQDMmTOH0NBQu38ysI8oDVn792nTwREP2NU9u3cuAEHuAaWa+G1K8fHWwhED\\nR90qt7OEENVTsd+q33zzDatWrSI+Pp5+/fpp5WazWfthIMTNsublkfLlegAC777H7kG/zee/16Yf\\nb/dwqe73wF9X/QADOtUp1W0LIURlUWzyv+OOOxg6dCgvvPACU6ZM0cr1er107yv+sXNTJmnTvn0v\\n99hnsVm0J/xvbxBOsEfp/dA8eyGTDzYXPlzYp20t6c1PCFFtFZv8T5w4QatWrRgxYgQxMTF2dVFR\\nUfTo0cPhwYmqyZRwUZuuO/MFDJ6e2vziQyu06SENS6czKZtNcSIqjWXrf9PKxt0uXVULIaqvYpP/\\nV199RatWrVi9enWROp1OJ8lf3LSYlxcC4Fq3Lu5NmtrVJRiTAHih61Ol1qHP7A8PcjH18gOEK6f1\\nQX/FGyxCCPH/7d13XFXlH8Dxz73AZQgOBBwMJw5AcFtqbi1nmSO1TIufK3GlJqFJ7jTLzLTUzJbm\\nKCs1t6U5cCOCSioqKooCTkDG5d7fHzcPkgMu3IuM7/v16tVzx/Oc5zwc/HLOec73KW6eGPyDgoIA\\n+OGHH7K8r9PpUEsaVJFL2rt30SUnAVDhnRFZPvvqxHKlXNE+62N/ubV802kl8Deo6czbnWpja226\\nCYRCCFEYZRvF161bx4oVK8jIyKBv3760bdv2sY//CZETDy/bq3HOnDtyX5tCeLzhfryvk7dJtqXX\\n69lzwnCLoU7VsgzvXkcCvxBCkIPgv3r1anr16sX27dvx9PRk586dbN68OT/6JooYvVZLwvrfgEfP\\n+hccX6qUh/gOMMn2xn+Z+SjhmN5+JmlTCCGKgmyDv7W1NRqNht27d9OxY0e55C9yLfFE5oQ7+7r1\\nlHK6Tkv03csABDYaZZJt/bbnPDfvpgIwUCb3CSFEFjmK5FOmTOHYsWM0btyY0NBQ0tLSzN0vUQTd\\n/8eQXMex68uoHvojcvSuIKXs7uCa5+3o9XrW77sIGB7pa+FXMc9tCiFEUZJt8J87dy6VKlXiq6++\\nwsLCgpiYGKZMmZIffRNFTFLYcQBsKlVW3jtzK0opT2z8rkm2c+9+ulJ+q1Ntk7QphBBFSbazn1xc\\nXPDx8WHXrl3s3r0bPz8/atWSy6jCOGk3bpAeb1hMx66WISCnZaQxP3QxAOXsnE02w//LXyMMbTra\\nmaQ9IYQoarI9858/fz5z5szhxo0bXL9+nenTp7N48eL86JsoInQp97kY9J7yWm1jg16vJ2jfDOW9\\n9xuPMcm2Eu6k8M/l2wD0bFnNJG0KIURRk+2Z/8GDB1m1apUy0U+r1fLGG28wZMgQs3dOFA3nRg5X\\nytUWfEmKNpWxf3+gvDe2wXCTLN6j0+uzzPCvX0PWoBBCiMfJ9sz/v0l9LC0tUUl2NJFD96POgU4H\\ngMfkKVjY2rLmzG/K512rvkTVUpVMsq2vfj+plL8Y3UKOUyGEeIJsT7d8fHwYOnQoTZs2BWD//v3U\\nqVPH7B0TRUP8r78AoLYrgY2HIcgfjD0KwDDft/BxMt2EvCORhtTA/+tSGzsbSeYjhBBPku2/kEFB\\nQWzevJmwsDBUKhXdunWjY8eO+dE3UcjpdTruRxqy9rmPDwTgRnK88rlX2Zom29bDS/U29algsnaF\\nEKIoyjb4q9VqPD09UalUqFQqatasKZdTRY6cH5/56J61uzsAUw7MUd4z1cI9J6LiWbLhlEnaEkKI\\n4iDb4D979mx27txJnTp10Ol0fPLJJ3Tp0oXRo0fnR/9EIZUaE0PGHcOs+4rDRwKG/P0PzG4ebJLt\\npGsz+GztCeX11++1Nkm7QghRlOVotv8ff/yBlZUVAGlpafTp00eCv3iquFUrlLJ9vfoAnL55BoCS\\nGgfsNSVMsp0hc3cr5SXjW6FWy1UpIYTITrbXXZ2cnLC0zPwbwcrKClfXvKdgFUVb8mnDZfgqcz4B\\nDCl3d13eB0CP6l1Mso2oq3eU8vi+9bC0kHUnhBAiJ7I98y9Tpgw9evTgueeeQ6/Xc/jwYdzd3Zk/\\nfz4Ao0aZZiEWUXSkXbuqlC3LOAJw7EYYUXcu4OvkTcPy9Z5U1Sgzvjc8NVDKXkPtSmVM0qYQQhQH\\n2QZ/d3d33P+drAXQqlUrc/ZHFAFxP68BQPVvTojUjDTWnfsDS5UFr5rorP9i7F2lHDywkUnaFEKI\\n4iLb4B8QEJAf/RBFhF6nUxbwcRtrSOm7PfovbqfeoUOl1jjblTXJdg6eug6Ad+UylLa3NkmbQghR\\nXMhNUmEyer2es4PfVl7betYg4f5NdlzaTSlNSV6s1MYk27l5N4Wthy4D0KqezD8RQghjSfAXJnPv\\n0EGl7B5kyN2/7twfpOu0vFK9EzaWpjlDH7coM39/vRrOJmlTCCGKkxwF/1u3bhEeHg4Ycv0L8V96\\nnY7YpV8BULr9i9hWrcY/N89xPC6cqqUq0aicaSb5XbiWea//04BmqCXhlBBCGC3b4L9x40Zee+01\\n3n//fQCmTZvG2rVrzd4xUbhc+SQzc59T9x5k6DL4+ex6VKjo5fmyybJCTvvuCACOJa3lXr8QQuRS\\ntsF/+fLl/P7775QpY3iUasKECaxZs8bsHROFx/UV33P/n0gAyg30R63RsOfqAa4mxfJ8hUZ4lHQz\\nyXau3EhUyqN6+pmkTSGEKI6yDf4ODg7Y2toqr21sbJRsf0LEr/uZO3/9CYCmQkVKNX+BQ7HHWHvm\\nd2wtbehW7SWTbEen0zP5m0PKa3cXe5O0K4QQxVGOkvz8+uuvpKamcvLkSTZt2oSjo2N+9E0UcCkX\\nznNz00YASjZ7gfJv+QPw3alVAHTwaI2DxjRBeuuhS0r581EvmKRNIYQorrI9858yZQrh4eEkJSUx\\nadIkUlNTmT59en70TRRwl2ZMVcrlBhoe8TudcEZ5r0Nl0y2ys37/RQD6d6iBva1ceRJCiLzI9sy/\\nZMmSTJ48OVeNz5w5k7CwMFQqFUFBQfj6+iqfrVixgvXr16NWq/Hx8WHixImsW7eO+fPn4+HhAUDT\\npk0ZNmxYrrYtzEuv1ytlz6++Vib07bz8NwDl7VxMtq0N+y+SmpYBQLM6FUzWrhBCFFfZBv+WLVs+\\ndqb2rl27nlrv0KFDREdHs3r1aqKioggKCmL16tUAJCYmsmzZMrZt24alpSVvv/02x48bssJ16tSJ\\nCRMm5GJXRH7Sp6YqZdW/Cz8lpiUpK/f5+7xhku0cjrzBr3+fB6C0vQaNlYVJ2hVCiOIs2+C/cuVK\\npZyenk5ISAipD/3D/yQhISG0a9cOgGrVqnHnzh0SExOxt7fHysoKKysrkpOTsbOz4/79+5QqVSoP\\nuyHyW0r0RQDsfDKv5uy4lLm8bkX78ibZzvdbIpXypwHNTdKmEEIUd9kG//8u31u5cmX8/f0ZOHDg\\nU+vFx8fj7e2tvHZ0dCQuLg57e3usra0ZPnw47dq1w9rams6dO1OlShVCQ0M5dOgQ/v7+aLVaJkyY\\ngJeXV7Y74ezskO13RN78d4yPzzLkeihVyU35bPufuwCY3Gq0SX4ml6/fIylFC8CGT17Oc3sFnRzH\\n+UPG2fxkjAu+bIN/SEhIltexsbFcunTpCd9+sofvEScmJrJ48WK2bNmCvb09AwYMIDIyEj8/Pxwd\\nHWnVqhWhoaFMmDCBDRs2ZNt2XNw9o/sjcs7Z2eGRMU6+cgUATeOmxMXdI0OXoXzmoqpgkp/Jewv2\\nKOWi/jN+3BgL05NxNj8ZY/MzxR9X2Qb/RYsWKWWVSoW9vT1TpkzJtmEXFxfi4+OV1zdu3MDZ2ZCH\\nPSoqCnd3d+WRwYYNGxIREUHPnj2pVq0aAPXq1ePmzZtkZGRgYSH3eQuSlEvRyj1/TYWKAGy+uBMA\\nFSqTZPNLSdOSeD8dgI+GPp/n9oQQQmTKNvgHBgZmuXyfU82aNWPBggX06dOHkydP4uLigr294Zlv\\nV1dXoqKiSElJwcbGhoiICFq2bMnSpUupUKECXbp04cyZMzg6OkrgL4ASjxlS7Fo6OaFSq0lMT2Lz\\nxR0A1HfxfVrVHPtk1XGl7FLa9infFEIIYaxsg//s2bP5/vvvjW64fv36eHt706dPH1QqFcHBwaxb\\ntw4HBwfat2+Pv78/b775JhYWFtSrV4+GDRvi5ubG+PHjWbVqFVqtlhkzZuRqp4T56FJSuLnRcCvG\\n6eXuAEzYk3klqGeNbnnextX4JKKuGhbwCXy9fp7bE0IIkZVK//DN+McIDAwkJiYGPz+/LGl9R40a\\nZfbO5ZTcXzKvh+/hXf3yCxKPGs78q3/xFZ+dXE7UnQsABD83Hhe7vC+x+/ZHfyrlbwLb5Lm9wkDu\\nk+YPGWfzkzE2v3y55+/m5oabm2kWZhGFW2rMFSXwVxg8DK2VWgn8vWq8bJLAv+PIZaW8cEyLPLcn\\nhBDiUU8M/uvXr6dbt24EBATkZ39EAZWRnER08CTDCwsLHBo3YdrBT5TPW7k1M8l2Vu44a2ivbkVs\\nrbP921QIIUQuPDG3/88//5yf/RAFXMy8zEBfff5C7qbdIzbpOgD/8+lvkm2cjr6llF/vUMMkbQoh\\nhHhUtgv7CAGGFfwAKn04DbWNDe/vnaZ8Vs+ljkm2sX6v4RaCm3MJLNRyaAohhLk88bpqaGgorVq1\\neuR9vV6PSqXKNre/KDqSLlxUytZu7iw/mZnyObDRaJNsQ6fX88/l2wCM7uVnkjaFEEI83hODv5eX\\nF59++ml+9kUUUMdHjwVA4+pGekY6R64bnsFv6dYUd4eKJtnG5z+fUMqOJW1M0qYQQojHe2Lw12g0\\nj+T1F8XPrR3blLL7+EBG7p6ovO5d4xWTbOPhR/tea1PdJG0KIYR4sifeWPX1NU2mNlG4xa0yXOIv\\n4VeXE8kXlPc/fM40yy7/uO0fpVypvAMvNvYwSbtCCCGe7InBf/z48fnZD1EA3fpzh1J2HTGafVcP\\nAlClZCWc7crmuf0bt+/z57EYABrUcCZ4YKM8tymEECJ7MqVaPNHt7YZL/mWbNQXg9M0zALzp1dsk\\n7Qd+lbli5PBXTfPEgBBCiOxJ8BePpc/IID3uBgDVA4Zx4U608pkpMvn9HXZVKQe90SDP7QkhhMg5\\nCf7ise7u26uULe3s2HLRMCmvrI2jSdo/dNqQIMinqiPV3UqZpE0hhBA5I8FfPNa9f5ftdeppuMQf\\nkXAagLe8++W57QydjlMXDdn8/DvVznN7QgghjCPBXzxWckQ4ACWbNWf7uT3K+x4OeX/8c9GvEUq5\\nlL11ntsTQghhHAn+4hH6jAylbOlQkqVHDY/7udpXwEJtkbe29XpCz8YD0LetZ57aEkIIkTsS/MUj\\nzg7xV8on4k4q5fdNkMp374lrSrl9I/c8tyeEEMJ4EvxFFnqdTim7vz+JxeHfAVDHyQuVSpXn9pdv\\njgTgpSaSzEcIIZ4VCf4ii9ToiwBYOjlhXbWq8v6QOgPy3PaVG4lKuWeranluTwghRO5I8BdZ3Nlr\\nmNxX8rmm7Lt6CABrS2uTnPVP/sbQXnXXUqhN0J4QQojckeAvFOk3E7iz+y8ANOXKs/H8VgA612hj\\n0u34d5HH+4QQ4lmS4C8UN1b+qJRLPt+UxPQkAHp6dcpz23eT0wCw0VhQroxdntsTQgiRexL8hSL5\\npOH5e/cJQRy5fhwAGwsbLC2euPJzjv317wI+TqVs8tyWEEKIvJHgLwDQa7Xo09MBsKpaleUnDc/2\\ne5etaZL2f99rWA64qU8Fk7QnhBAi9yT4CwDuHshcYW9XzD6lPNC7b57b/mDZQaXcobE82y+EEM+a\\nBH+BXqfj+rfLACj7cnd+PfcHAK9W74JalbdDZOuhS8TEGeYO9GhZVWb5CyFEASDBXxC7bIlS3uiR\\n+Sx+S7emeWo34kICq/88B4BLaVs6P185T+0JIYQwDQn+gnsHDwDg9GpPDiWEAdDTsxuW6txP9EtK\\nSefT1WHK64+GPp+3TgohhDAZCf7FnF6vV8qhnpkz8Vu7N89TuyM+y1wJ8Mt3W+apLSGEEKYlwb+Y\\nS7t6VSnHam8D0NajRZ7a/OfSLaU89e3GWGvythKgEEII05LgX8zFrTI80mdXvwGhceGU0pTklWq5\\nT+qTodMxe2UoALU8SuPmYm+SfgohhDAdCf7FXPJpw5K9N5v7cF97n4bl6uZ6hr9Or2fQnF3K6+4t\\nqj75y0IIIZ4ZCf7FmF6rVcqH1IYMfA3L1811e3vCMm8hDH3ZG0+30rnvnBBCCLOR4F+MXZ4zCwB1\\niRJExJ+inJ0z7vauuW7vuy3/ANC2gRuNa5czSR+FEEKYngT/Ykqv15NyPgqAu52ak67T0rBc3Vwv\\n3fv3Q2f93V+oYpI+CiGEMA8J/sVUYugxpby/nCEDX8Ny9XLVVlp6Bt9ujgTAu3IZ7Gys8t5BIYQQ\\nZiPBv5iKXfoVADaNG/HPrXNUKumOi51Trtr6KzRGKY/u7WeS/gkhhDAfCf7F1IMV/C608UKn19Eo\\nl2f9gJLCt0vTylio5ZASQoiCTv6lLoYykgyX+VGpOHz7JCpU1HfxzVVbV+Iy1wJo18DNFN0TQghh\\nZhL8i6G7Bw3L91pW9uDC3UvULFOdUtYljW5Hr9czedkhADSWakqW0Ji0n0IIIcxDgn8xlHLOcJk+\\n1tMZgIblc3fJ/8zl20p5/sgX8t4xIYQQ+UKCfzF075BhFb+9TolYqi2p6+ydq3Z+3H4GgNqVykj+\\nfiGEKEQk+BczutRUpRzNTXzK1sbW0tbodm7cSiYmzjB34I0ONUzWPyGEEOYnwb+YiVuzKsvrRuWM\\nT+er0+sJXHxAeV2hbIk890sIIUT+keBfzNzZ/RcAfzU0rLbnXbaW0W18+M1hpfzFaLnXL4QQhY0E\\n/2JEr9cr5Yjqhkv9VhbGZePT6/XK43192npKNj8hhCiEJPgXI+k3ritlnVqFl2NNo9tYv++iUu7Q\\nyN0U3RJCCJHPJPgXI0kR4QD8U8kagBZuzxtVX5uh4/e9FwDDyn1CCCEKJwn+xUjSiTAAUjRq6rv4\\nUsfJy6j6mw9EK+W+bT1N2jchhBD5R4J/MaFLSSH5ZAQAR73sGOjV1+g2th66DMCQbt6o1blb+lcI\\nIcSzJ8G/mEgKP6GU75WwwEJtXFKem3dTSE7VAlCnalmT9k0IIUT+sjRn4zNnziQsLAyVSkVQUBC+\\nvpmLx6xYsYL169ejVqvx8fFh4sSJpKenExgYyNWrV7GwsGDWrFm4u8ukMlOI/WYpAMdq2WJtYXwO\\n/r0nrillOxuzHjZCCCHMzGxn/ocOHSI6OprVq1czY8YMZsyYoXyWmJjIsmXLWLFiBT/99BNRUVEc\\nP36cjRs3UrJkSX766SeGDh3KJ598Yq7uFSvXf/xeWcL3sFcJOlRqY3Qbfx67AsA7r/iYtG9CCCHy\\nn9mCf0hICO3atQOgWrVq3Llzh8REw/PhVlZWWFlZkZycjFar5f79+5QqVYqQkBDat28PQNOmTTl2\\n7Ji5ulds6PV67uz6E4AYZytSbNS0dTcuMc+Fq3e4m2z448GvulzyF0KIws5swT8+Pp4yZcoorx0d\\nHYmLiwPA2tqa4cOH065dO1q3bo2fnx9VqlQhPj4eR0dHQ8fUalQqFWlpaebqYrGQGp05Q//n9oaf\\nh7GJfX7bHaWUrSxlAR8hhCjs8u3m7cPZ5RITE1m8eDFbtmzB3t6eAQMGEBkZ+dQ6T+Ps7GCyfhY1\\nMfsMz+VfcTEE/C412ho9Xn8eMczyX/J+O5ydJI+/uchxnD9knM1PxrjgM1vwd3FxIT4+Xnl948YN\\nnJ0N68dHRUXh7u6unOU3bNiQiIgIXFxciIuLo1atWqSnp6PX69Fosp+cFhd3zzw7UQQkRJ4FIKKa\\nIZ3v887P5Xq8LHQZMtZm4uzsIGObD2SczU/G2PxM8ceV2S77N2vWjK1btwJw8uRJXFxcsLc3LCbj\\n6upKVFQUKSkpAERERFC5cmWaNWvGli1bAPjrr79o0qSJubpXbNwL2Q/ADUdLmlZoTGnrUkbV3/Dv\\nlYOSJTSoVPJsvxBCFAVmO/OvX78+3t7e9OnTB5VKRXBwMOvWrcPBwYH27dvj7+/Pm2++iYWFBfXq\\n1aNhw4ZkZGSwf/9++vbti0aj4aOPPjJX94qFtOuxSvlWSQtauDU1uo2NIYY5A5LOVwghig6z3vMf\\nN25clte1amUuH9unTx/69OmT5fMHz/YL07i9czsAsWUtsVRb4u5Q0aj691O1pGt1AHRtWtnU3RNC\\nCPGMSIa/Iiw58jQA+/3saVrR+FsokdG3ANBYyQx/IYQoSiT4F2FpV68CcNXJCu+yxi/fe+qiIfh3\\nblbFpP0SQgjxbEnwL6Iefkwyw1KFj1Nto9vY+W9Wvybe5U3WLyGEEM+eBP8i6u7+fQBcrKChc5X2\\neWrLq4qjKbokhBCigJDgX0RdX/41AKkaFR0rtzO6/tINpwCw0VjII35CCFHESPAvghKPhyrl0y95\\n5yp4h5w0PCbYqq6ryfolhBCiYJC1WYugmzGGxDxhnra8Ubu30fWTU9KVcu821U3WLyGE6S1YMI9/\\n/jnNzZsJpKSkULGiKyVLlmLmzI/Nvu309HSGDfOnatVqODk54+LiwqBBb+W53aNHD7N8+VIuXYpm\\n/fqt3Lt3j48+msrt27fJyMjA0bEsEycGU6KEvQn2ItPff+9i//49BAZ+8NjPlyxZhIuLC6+80tOk\\n230WJPgXQSm/rgfgnntZKtobP1kv4LM9AJQqkX1qZSHEszVixBgANm3awPnzUQQEjM63bd+4cR29\\nXk9QUDBLliwyWbsNGjTCz68er77aGYBVq37E17cur732OgDffLOE7du38sorPUy2zeJGgn8RVqfF\\ny0bX2XLwklIe0s3blN0RQuSzRYs+5+TJcHS6DHr27Evbtu2ZOvUDypUrzz//nObGjesEB0+nSpVq\\nfPjhRG7duklaWhqDBg2jcePn+OmnH9m1aycALVu2oV+//kyd+gE2Njbcu3eP9PQ0Ll++xEcfTcPR\\nMXO57wUL5nHqVARarZZevfpSsmRJQkL2MmbMe2zevJE1a1ayfPlKrl+PZdasqXz22dP/cEhMvIda\\nnXmX+u23Bz/ynQ0bfiMi4gS3bt3kwoULDBnyDtu2beHSpYt8+OEMatXyeuz+nD37DzNmTKFUqVJU\\nrJh5m3Pt2lX8+ec2VCo1rVq1oXfvfnn6WRQ0EvyLmIgdP/PgfL2RWyOj66/96xwAVSo4UKtSmWy+\\nLYR42Jo/z3E48oZJ22xUyyVXt9+OHTvCrVs3WbhwKampKfj7v8kLL7QEICMjg08//YJfflnN1q2b\\nadOmHcnJSSxcuJS7d+9w+PBBrly5zPbtW1iy5FsA/P3707p1WwBKly7De+9N5MqVy0yd+gGBgR8o\\nZ/4hISFcuXKJL79cRnJyMgMG9GX58hUsX74UgPDwMEqWLE1ycjLh4WHUr98w233p0eM13n03gH37\\n9tCkyfO0bduB6tU9H/leTMwVFixYzG+//cKKFd+xbNmPrF//Kzt2bMPe3uGx+7N8+dcMHvwOTZs2\\nZ/bs6QBcuXKZvXv/5ssvv0Gn0zFkyFu0bm38xOmCTCb8FTGxh/cCkOBm3AI+AIdOX+dBdoDA1+ub\\nsFdCiPwWHh5GeHgYAQGDGTt2JDpdBjdvJgDg51cPAGfnciQlJVKlSjXu3LnDtGmTOX48lNat2/HP\\nP5HUqeOLpaUllpaW1KnjS1SUYZVQL68nXxWMiIigbt0GANjZ2eHhUYnY2Guo1WpSU1OJj4+jWbPm\\nnD59kvDwMOrVa5Dtvnh4VGLVql8ZMmQ4aWmpjBw5lM2bNz7yvVq1vFCpVJQt60T16jVQq9U4OpYl\\nKSnxiftz8eJ5fHx8AZS+nDoVweXL0QQEDGbkyKGkpNzn2rWrRox+wSdn/kWMR9RtAGr0Nn7SzU87\\nDb/YZUtaY2UpKX2FMFbvNtULzCRZKysrunXrTr9+bz7ymYVF5u+3Xq/H1taWpUu/Izw8jD/+WE9I\\nyD4aNWqSJVlYeno6KpXhfNHS0uqJ21WpVFnqabXpqNUq6tTx5ciRQ9jbO+DlVYejRw9x7txZRox4\\nN0v9W7duUaZMGfR6vdLP1NQUrK1taNLkeZo0eZ7nn2/Ojz9+S8eOXZ64X//dx//268H+6PV65ZaC\\nTqdT9q9ZsxaMHTshS/sHDux/4n4XNnLmX4Tcv3RRKZf1qmtU3bT0DO4kpgEw7X+ylLIQhZ2Xlw/7\\n9u1Bp9ORkpLCZ5/NfeJ3IyNPsXPnNvz86jF+fBDnz5+jZs1aREScQKvVotVqiYw8hadnjWy3W6dO\\nHUJDjwCQlJTItWtXcXV1o27dBqxZ8xNeXt7UqFGT8PAwSpQogaVl5jloYmIi/v5vkJKSwsWL5/Hw\\nqAzAiBFDOXbsiPK9uLgbWe7P58ST9sfDoxKRkYa8Jg+2UatWbY4ePURqago6nY7PPvuYtLQ0o7ZX\\n0MmZfxFybdPvAPxT2Ybsf0Wz+n3vBaVso5HDQojCrm7d+vj4+DJkyFuAnh49XnvidytWdGXx4oX8\\n9tsvqFQqXn99AK6ubnTs2JURI4ag1+t55ZWeuLiUy3a7TZo0YcuWHQwfPgitVsvw4aOxtrbB17cu\\n778/lmHDAtBor6B0ygAAHQlJREFUNCQmJtKsWYssde3t7XnjjYG8844/VlYaxowZD8CkSR/y6adz\\nWLZsMWq1mlKlSjF2bKBR4/Gk/Rk48H989NF0XFxcKF++IunpaVSs6Er37r0YPnwwKpWKVq3aoNEU\\nraefVPqHr4MUUnFx9551FwqEM/8bCMD+HnUY2HGsUXWnfXeYC9fuMbx7HRrUdM7ymbOzg4yxmckY\\n5w8ZZ/OTMTY/Z2eHPLchl/2LiOsrflDKTRp1eco3H5WSpuXCNcMvq09VyeMvhBBFnQT/IuLOX4Zn\\nV09VtaFWWeMu+n/1+0mlbG0lE/2EEKKok+BfBOjSMyeiHGnhblQuf22GjhNRhsd/BnasZfK+CSGE\\nKHgk+BcBNzcY0vkm2qppUiH7Z2Yf9vBEvxd8K5i0X0IIIQomCf5FwK1dfwKGS/51XeoYVff6rfsA\\nvNWxlizdK4QQxYQE/yJAdz8ZgEPeJXB3MO7Z19MXbwJQU1L5CiFEsSEPdBdy+owMVP8+rNmhmnG5\\np/V6PUkpWgCcStqYumtCiHxw7dpV3nyzDzVrGubspKWl8frrA2jZsvUz61NsbCw3b8bj5eXD/Pmf\\n0KtXH6OT8jwsLu4GPXp0Yfr0ObRo0QowJOSZPDmQypWrApCRoWXo0JH4+dVl2bLFlC5d+qm5DYo7\\nCf6F3PUfvwPgnq2ahuWNy+q3/fBlpaxWyyV/IQorD49KfPHFEgDu3r3DW2+9znPPPY+19bP5o/7Y\\nscPcv5+Ml5cPo0YZl3PkcXbs2Iabmzs7d25Vgj8YEhlNnz4HMCzqM378KFau/CXP2ysOJPgXYnq9\\nnrt7/gbggG8JhpfIPvvWAzqdnlV/Glbwa+ZT3iz9E0Lkv5IlS1G2rBMJCQloNBpmzZr2b359NRMm\\nfED58uXp06c7NWrUonHjJlhYWLJu3RosLa2oXr0GY8dOICBgMLVrexMZeYrU1FSmTp1F+fIVWLx4\\nISdOHEeny+DVV3vTvv1LxMZeY/r0YHQ6HeXLVyA4eBLffLMES0tLypUrz6pVKxg1ahyBge+ycuUv\\nWFtbExp6lLVrVzFp0ofMnDmFe/fukZGRwejR4x+7Wt/27VsYM+Y9PvwwiPv372Nra/vId1xd3UhK\\nSiIjIyM/hrnQk+BfiGXcvaOUE+pUMqru6n8DP0D/F2uarE9CFGfrzm0k9Ea4Sdus51KHV6vnPHHX\\ntWtXuXv3Di4u5fj445n06fM6jRo1ISRkL9999zUTJkzi6tUYZs6cS9Wq1RgwoA9z5nxGuXLl+eOP\\n9aSmpgCGPyIWLFjMzz+vYs2albRs2Ybr12NZuHApaWlpvP32G7Ro0YolSxbRp8/rNG/ekkWL5hMT\\nE0PHjl0oXbo0zZu3ZNWqFVhYqGnYsDFHjx6madPm7N27m1at2rJmzU80adKUrl1f4cKF88yfP5fP\\nPluUZX8uXbpIUlIijRo1oV69Buzdu5v27V96ZL9PnYrAxaVclgV9xJNJ8C/E7h05DMD5ihq6VX3R\\nqLqnog0T/Xq3ro5GEvsIUahdumRYfhZAo9EwadIULC0tiYg4waVL0Xz33TJ0Oh2lSxsm9trY2FK1\\najUA2rV7kaCg8bz4YkfatXtRuVXQqFFjAHx8fDlwYD/h4WGcPBmubEev1xEfH8+ZM5HKpf133hmF\\ns7MDmzdvf6SPLVu2Yd++v2natDkHDx7A338IH3zwPrdv32Lr1k0Ayh8eD9u+fStt23YAoH37l9i0\\naYMS/I8fP6b0p0SJEkyaNMUEo1k8SPAvxOJ+WgFATDkNHZyfvL7248TEJQHQoZG7yfslRHH1avUu\\nRp2lm8rD9/wfZmlpxbRps3FycsryvpVV5j/9/fu/Rfv2Hdm1awcjRw5j4UJDOw+Wt32wHK6VlRVd\\nurxM//5ZlwtXq9XodNkvEdOwYWMWLZpPVNQ5XF1dsbMrgZWVJWPGjMfHx/eJ9bZv34parWL//r3o\\ndBlcvRrDvXuGdOQP3/MXxpFH/Qop/b+/mADHatmiVuX8R3k/VauUZaKfEEWXl5cPe/bsAuDo0cNs\\n27Yly+c6nY7Fixfi5OREnz5v4ONTh9jYWADCwo4DEBERTuXKVbMsEZyamsq8eYagW6uWF8eOGa5C\\nfv31V+zfvx+1Wv3IvXeNRkO1ap6sXPk9rVq1Vfr399+G/l24cJ5Vq37MUuf06ZPY2dmxcuUvfPvt\\nSr7/fjVt2rRn9+6dphukYkqCfyGlS00FIMlGzbiGI4yq+92WSADKOFibvF9CiILD338we/bsYvjw\\nQSxfvhQfn6xJwNRqNXZ2JRgy5C1GjRqGSqXC09OwNsj167G8++4Itm/fQu/efalTx4969RowZMhb\\nBAQMombN2v9uYwjr1/9GQMBgrl2LoUmTJvj41GHFiu/Ztm1zlu21bNmGXbt20rx5SwB69nyNmJjL\\nvPPO/5g9ezp169bP8v3t27fQuXPXLO917tyNHTu2mXSciiNZ0reQunDoT9KXfM/FCho6THv0ct/T\\nvP2RISPgiFfrUK+GczbfliU684OMcf6Qcc6ZgIDBvPvue1StWt3oujLG5idL+hZjYdvXAKCxszeq\\n3pnLt5Wyb/WyJu2TEEKIwkEm/BVC2vQ0vC4YZsXWav2yUXU/WxsGQHPfClio5W8/IcSjHjd5UBQt\\n8q9/IXR1V+aknXKNX8hxPb1eT0qaYRJO37aPJtIQQghRPEjwL4TubjNMdrnauDoqI87eV24/C0DV\\niiWxtZaLPkIIUVxJ8C+ENLcSAUht2cioehdi7wLQqJaLyfskhBCi8JDgX8joUu4b/q+Ceu4NjKp7\\n/qoh+Lfwq2jyfgkhhCg8JPgXMon/Jt5I1aiwt7LLcb3klHQALC1UcslfiCJkwYJ5BAQMpl+/Hrz6\\namcCAgYTFDT+WXcLgHnz5ihJg3JjyZJF/Pbbz4BhztL774/jt99Ms2rfw21np3v3TqT+m1vFXPJj\\nGw+TKFDI3N23F4BTVW3xsch5kp7NBy8BUNO9tFn6JYR4NkaMGAPApk0bOH8+ioCA0c+4R5nGjHnP\\nZG0tWbKIihUr8sorPUzWZnEmwb+QSb5+FYCT1WyxUOdsQZ6klHT+CIkGoFHtnC/7K4Qo3BYsmMep\\nUxFotVp69epLhw4vMWyYP3Xq+HLq1EnS09OZOnUW5cplLusdGXmKefM+xsrKCmtrG6ZOnUlGRgZT\\np35AcnIy9vYOTJkyk/T0dGbNylyO9913J1ClSlVeeuklKlWqStOmzdmw4TcCAz9g69ZNpKamEB19\\nkZiYK4wZ8x6NGz/H999/w59/7sDV1ZW0tDTeeOMt/PzqPrIf27dv4fz5c8ya9Yny3ooV37Fnz24y\\nMrQ0b96SAQP8iY2NZfLkQDQaDb6+dTl5MoL58xc9djsPW7Toc06eDEeny6Bnz760bdv+seMZG3uN\\niRPf4+OPPyM6+iJLl36JpaUl5ctX4L33JhIaepSff15FUlISAQGjmTr1A557rikREeGULl2a2bPn\\nkZycxMyZU0hMTPx33HKXTCmvJPgXNgm3AHCt7JXjKgvXZS4x2ty3gsm7JIQwiFu7Sllt01QcGjbC\\nuVcfo+sdPXqYK1cu8eWXy0hOTmbAgL688IIhrW7p0o588cUSVq9ewc8/r2b48FFKvY0b19Oz52u0\\nb/8Shw8fJCEhgT/+WE/Tpi/w6qu9WLnyB44ePUxk5Cl8fevRt+8bRESE88UX8/jkkwVER0czY8bH\\neHhUZsOG35R24+LimDv3c/bt28P69evw9KzJ77+vY+XKX7h37y59+776SFAGiIw8zZ9/7mDlyl9Q\\nP/R0k1ptwaJFXwPQq1c3evfux6pVP9Khw0v07NmHBQs+BeDWrVtP3c6xY0e4desmCxcuJTU1BX//\\nN3nhhZZoNJos/UhNTWXatMm8//5kypRx5N13R7BgwWIcHBxYsOBTdu/+k5IlS3H+/HlWrvwZlUrF\\nlSuX6dLlFUaOHIu/f38uXIjir7920qxZCzp16sq5c2dZuPBzPvnkc6N/vnklwb8Q0d42ZOfTWoCv\\ns0+O6pyLuUPkJUO9if0boFbJQj5CFAeRkaeoW9cwKdjOzg4Pj0rExFwBMpfr9fb25YcfvslSr0WL\\nVnz66Ryioy/Spk17PDwqceZMJO3aGZYN79evPwC//fYz//vfMAB8fOpw6ZLh6qKDgwMeHpUf6c+D\\nM3oXFxcSExO5cuUS1at7Ym1tjbW1s7JWwH+dORNJr159WLRoPpMnT1Pe12isGD78f1hYWHL37h3u\\n3btLdPQFOnY0rKrYrFkLzp07l+12wsPDCA8PU5YG1ukyuHkzgfLls54ozZkznVat2lK9uidxcTeI\\nibnC++8bljK+f/8+Tk4ulCxZCk/PGlhZWaHVanFwcFCWTn6w3+HhYYSE7GPTpg0AaLVangUJ/oXI\\n3QP7AbhcTkO9MtVyVGfuqlClXM21lFn6JYQwcO7VJ1dn6eagUql4eOkWrTZdWcUzcwlew3K9D2vc\\n+DmWLv2O/fv3MH36ZEaOHItabYFer8vyPUM9Qzt6vR6dzpBAzMrK6rH9sbDIvE2p1xvqPHwm/99+\\nPNCtW3defrkH48aN4o8/1tO5czdiYq7wyy9rWLbsR2xtbenXr4fSjwf7+KC97LZjZWVFt27d6dfv\\nzcdu/wEXl3Js3ryR7t17YmlpRbly5R/JhHj48MEsyyVbWGQNsXq9YXtjx06gdm3jlmE3NZntX4gk\\nnzoJwFkPa1zsnLL5tuGgT0s3/MLOH9ncrH0TQhQstWp5ERp6BICkpESuXbuKq6sbAGFhhpOCiIgT\\nVK5cNUu9tWtXkZSUyIsvdqJnzz6cPXuG2rW9OHrU0Na6dWvZtm3zv0v5Gt47cSKMatVqGNW/ihVd\\niYo6h1ar5ebNBM6ciXzid1UqFRMnBvPtt8uIjr7I7du3cXQsi62tLadORRAXF0d6ejqurm5ERp4G\\n4MC/J0vZbefhpYpTUlL47LO5j+3DkCEBNGnyPN99t4wyZcqg1aZz6dJFANasWcn58+dytN8PL2N8\\n/vw51qz5KUf1TE3O/AuRe2cjsQASKmcf+MFwyf8BBzvNU74phChq6tdvyIED+xk+fBBarZbhw0dj\\nbW0DwLVrV3n33QCSkpKYMePjLPVcXd0IChqPg4MDGo2GoKAPsbCwYPr0YPbt+xt7e3uCg2eQkZHB\\nrFlTGDlyKHq9nrFjA43qn5OTM61atWXw4AFUqlSF2rW9sbB48vmoo2NZRo8eR3BwEIsWfY2FhSXD\\nhvlTt259unR5mU8+mc3o0WMJDg5ix45teHkZ2stuO3Xr1sfHx5chQ94C9PTo8doT+/DWW4MYPHgg\\nLVq0YsKESUybNhkrKw3Ozi50796LhISEbPe7V6++zJw5hXfe+R96vc6kT0QYQ5b0LST0GRmcHeIP\\nQMxkf1p7ZJ/T/8HSvdXdShH0hnEJgR4mS3San4xx/pBxhmHD/AkM/IBKlSqbpX1jxnjTpg106NAR\\nlUrFm2++xueff0XZsjk7uXmcqKhz3L+fjI+PL1u2/EFERDjjxgWafDvPmimW9JUz/0Ii4ZQhuc8t\\nB4scBf50beb9uTG9/MzWLyGEyK24uBsMGvQmVlYaOnbskueAbGtry9y5M1Gp1KjVaiZO/NAs2ykK\\n5My/kPhtxlC8LqRwzNuePmO+yP77e86zft9FAL4JbJOnbcvZkvnJGOcPGWfzkzE2P1Oc+cuEv0Lg\\n0r0reF1IAaB50545q3PdsPhP/w7GTcIRQghR9EnwLwQijmxXyq6NW+aozvFz8QD4VC1rlj4JIYQo\\nvCT4FwLp+w4aCr5eT3wW9mF3k9KUslMpG3N1SwghRCElwb+Au516h0qxhpWe3LrmbEGL4OWHALCy\\nVOfojwUhhBDFi1ln+8+cOZOwsDBUKhVBQUH4+voCcP36dcaNG6d87/Lly4wdO5b09HTmz5+Ph4cH\\nAE2bNmXYsGHm7GKBF5t0A9sUw5xM2/8k43gcnV7PnUTDmf+7vWWWvxBCiEeZLfgfOnSI6OhoVq9e\\nTVRUFEFBQaxevRqAcuXK8cMPPwCGvMb9+/enTZs2bN26lU6dOjFhwgRzdavQOX3zDNWtVFinP5qG\\n83G++i1CKdf0KGPOrgkhhCikzHbZPyQkhHbt2gFQrVo17ty5Q2Ji4iPf+/XXX3nxxRcpUaKEubpS\\nqMUmxmKdrkddyT3b7+r1eo78EwfAiB51zN01IYQQhZTZgn98fDxlymSeeTo6OhIXF/fI99auXUvP\\nnpmPrx06dAh/f38GDBjAqVOnzNW9QkGn15FwyZAv2jI1+5Wfdh+/qpTreTqbrV9CCCEKt3zL8Pe4\\nXEKhoaFUrVoVe3t7APz8/HB0dKRVq1aEhoYyYcIENmzYkG3bpkh4UFDN9/8C/HP23V4datGrQy2z\\n9KMoj3FBIWOcP2SczU/GuOAzW/B3cXEhPj5eeX3jxg2cnbOeje7atYvnn39eeV2tWjWqVTMsVVuv\\nXj1u3rxJRkZGlqUghRBCCJE3Zrvs36xZM7Zu3QrAyZMncXFxUc7wHwgPD6dWrcwz1aVLl7Jx40YA\\nzpw5g6OjowR+IYQQwsTMduZfv359vL296dOnDyqViuDgYNatW4eDgwPt27cHIC4ujrJlMzPQde3a\\nlfHjx7Nq1Sq0Wi0zZswwV/eEEEKIYqtILOwjhBBCiJyTDH9CCCFEMSPBXwghhChm8u1Rv9x4Unpg\\ngP379/Ppp59iYWFBixYtGD58eLZ1xOMZO84HDx5k1KhReHp6AlCjRg0++OCDZ9X9QuFpY5yamsrk\\nyZM5e/Ys69aty1Ed8Shjx1iO49x52jgfOHCATz/9FLVaTZUqVZgxYwZqtVqOZSMZO8aHDx82/ljW\\nF1AHDx7UDx48WK/X6/Xnzp3T9+7dO8vnHTt21F+9elWfkZGh79u3r/7s2bPZ1hGPys04HzhwQD9i\\nxIhn0d1CKbsxnjp1qn758uX67t2757iOyCo3YyzHsfGyG+f27dvrr127ptfr9foRI0bod+3aJcey\\nkXIzxrk5lgvsZf+npQe+fPkypUqVokKFCqjValq2bElISEiOUwqLTLkZZ2Gc7I7LMWPGKJ/ntI7I\\nKjdjLIyX3TivW7eO8uXLA4asrrdu3ZJj2Ui5GePcKLDB/2npgePi4nB0dHzks5ymFBaZcjPOAOfO\\nnWPo0KH07duXffv25W+nC5nsjsv/5r/ISR2RVW7GGOQ4NlZOx/nGjRvs27ePli1byrFspNyMMRh/\\nLBfoe/4P0+fiicTc1CnucjJmlStXJiAggI4dO3L58mXefPNNtm3bhkajyYceFn5yLJufHMf543Hj\\nnJCQwNChQwkODs4SxJ5WRzxZTsY4N8dygT3zf1p64P9+dv36dVxcXHKUUlhklZtxLleuHJ06dUKl\\nUuHh4YGTkxPXr1/P974XFrk5LuVYNk5uxkuOY+NlN86JiYkMGjSI0aNH07x58xzVEVnlZoxzcywX\\n2OD/tPTAbm5uJCYmcuXKFbRaLX/99RfNmjXLUUphkVVuxnn9+vUsW7YMMNwaSEhIoFy5cs9sHwq6\\n3ByXciwbJzfjJcex8bIb548++ogBAwbQokWLHNcRWeVmjHNzLBfoDH9z587lyJEjSnrgU6dOKemB\\nDx8+zNy5cwHo0KED/v7+j63z8NoB4vGMHefExETGjRvH3bt3SU9PJyAgQLnvJB7vaWM8cuRIYmNj\\nOXv2LD4+PvTu3ZuuXbvKsWwkY8e4devWchznwpPGuXnz5jRq1Ih69eop3+3SpQuvvfaaHMtGMnaM\\nO3fubPSxXKCDvxBCCCFMr8Be9hdCCCGEeUjwF0IIIYoZCf5CCCFEMSPBXwghhChmJPgLIYQQxYwE\\nfyEKgCtXruDj40P//v2z/Hf69Okn1lmwYAHz5s3Lx14+2ZIlS9i1axcAGzZsQKfTAdC/f38yMjLy\\npQ+7d+/m9u3b+bItIQq7QpPeV4iiztHRkR9++OFZdyNXBg8erJQXLFhAx44dUavV+bo/3377LR9+\\n+CGlS5fOt20KUVhJ8BeigIuKiiI4OBgLCwsSExMZPXo0L7zwgvK5Vqtl0qRJXLhwAZVKRe3atQkO\\nDiYtLY2pU6cSHR1NUlISXbp04e23387S9rp169i+fTsqlYrr169TtWpVZs6ciZWVFYsWLWLXrl1Y\\nWlri6enJpEmTSEtLY+zYsdy9exetVkvr1q0ZNmwYgYGBNGjQgGvXrhEdHc3AgQP54osvaNKkCSEh\\nIXTq1Im///4bjUZDSkoKrVq1Ytu2bZw6dYqFCxei1+uxtLRk2rRpuLu7Z+ljmzZtlJzln3/+OfPn\\nz1dWlyxfvjwff/wxa9eu5ciRI4wbN45Zs2ah1WqZPXs2Wq2W9PR0Jk+ejJeXl/l/WEIUEhL8hSjg\\n4uPjGTVqFI0aNSI0NJRp06ZlCf5nzpwhLCyMzZs3A7BmzRru3bvH6tWrcXFxYfr06WRkZNC7d2+a\\nNm36SHa18PBwtm3bhq2tLW+88QZ///03jo6ObNu2jbVr12JlZcXIkSPZuHEj9vb2aLVaVq5ciU6n\\n44cfflAu8QOMHDmShQsX8u2332JpafjnpWTJktSvX589e/bQtm1bdu/eTePGjbGysiI4OJjVq1dT\\nunRpduzYwZw5c1iwYMEjY1C5cmXGjx+PVqvF1taWlStXolar8ff3Z+/evfTr14+vv/6auXPnUqlS\\nJbp27crChQvx8PAgMjKSoKAg1q1bZ44fjxCFkgR/IQqImzdv0r9//yzvzZ8/H2dnZ+bMmcO8efNI\\nT09/5L52tWrVKFOmDIMGDaJ169Z07NgRBwcHDh48SGxsLIcPHwYgLS2NS5cuPRL869evj52dHQD1\\n6tUjKiqKy5cv06hRI6ysrABo3Lgx4eHhDB8+nM8//5xRo0bRsmVLevXqhVqd/dShrl27snXrVtq2\\nbcumTZvo1q0bZ8+eJS4ujhEjRgCQkZGBSqV6bP0H6UwtLS1Rq9X069cPS0tLzp8//8h65gkJCVy4\\ncIGJEycq7yUmJqLT6XLUVyGKAwn+QhQQT7rnP3bsWDp37kzPnj05c+YMQ4cOzfK5tbU1K1eu5OTJ\\nk/z111/07NmTn376CY1Gw/Dhw3nppZeeut2Hz9wfZPv+bxDW6/WoVCrKli3L77//TmhoKDt37qRH\\njx78+uuv2e5bmzZtmD17Nnfu3OH48eN8/PHHnD9/nooVK+ZoXsCDP0KOHj3KL7/8wi+//IKdnR0j\\nR4585LsajQYrK6tCO39CiPwgfwYLUcDFx8fj6ekJwKZNm0hLS8vyeXh4OL/++ive3t4EBATg7e3N\\nxYsXadCggXIrQKfTMWvWrMfOhg8LC+P+/fvo9XqOHTtGzZo1qVu3LgcPHiQ9PR2AkJAQ/Pz82Lt3\\nL7t27aJBgwa899572NnZkZCQkKU9lUqFVqvN8p61tTXPPfcc8+bNo3Xr1mg0GipXrsytW7c4c+YM\\nAIcPH2b16tVPHYuEhARcXV2xs7MjJiaG48ePK+PxYLsODg64ubmxe/duAC5cuMAXX3yRo7EWoriQ\\nM38hCri3336b9957Dzc3NwYOHMj27dv56KOPKFGiBAAeHh4sXLiQ1atXo9Fo8PDwoH79+vj5+XH2\\n7Flee+01MjIyaNWq1WNnwteoUYP333+fK1eu4OnpSfPmzbGwsKBz5868/vrrqNVqvL296dKlC9eu\\nXSMwMJCvv/4aCwsLmjdvjqura5b2XnjhBXr06MGXX36Z5f2uXbsyaNAgfvzxRwBsbGz4+OOPmThx\\nItbW1gBMnTr1qWPRrFkzvvnmG/r27YunpycjRoxg4cKFNGnShObNmzN06FBmz57N7NmzmT59OkuW\\nLEGr1RIYGJjr8ReiKJJV/YQoxtatW8f+/fuVZZuFEMWDXPYXQgghihk58xdCCCGKGTnzF0IIIYoZ\\nCf5CCCFEMSPBXwghhChmJPgLIYQQxYwEfyGEEKKYkeAvhBBCFDP/Bx/o1GuPpFCZAAAAAElFTkSu\\nQmCC\\n\",\n            \"text/plain\": [\n              \"<Figure size 576x396 with 1 Axes>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        }\n      ]\n    }\n  ]\n}\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_tfjs/run.local.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\n\npython -m tf_trainer.tf_hub_tfjs.run \\\n  --train_path=$train_path \\\n  --validate_path=$valid_path \\\n  --model_dir=\"tf_hub_tfjs_local_model_dir\" \\\n  --train_steps=9000 \\\n  --labels=toxicity,severe_toxicity,obscene,sexual_explicit,identity_attack,insult,threat\n"
  },
  {
    "path": "experiments/tf_trainer/tf_hub_tfjs/run.py",
    "content": "\"\"\"Experiments with Toxicity Dataset\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.tf_hub_tfjs import model as tf_hub_classifier\n\nimport pandas as pd\nimport tensorflow as tf\nimport tensorflow_hub as hub\nimport sentencepiece as spm\n\nFLAGS = tf.app.flags.FLAGS\n\n\nclass TFRecordWithSentencePiece(tfrecord_input.TFRecordInput):\n  \"\"\"Specialized setencepiece based input preprocessor.\"\"\"\n\n  def __init__(self, spm_path):\n    super().__init__()\n    self._sp = spm.SentencePieceProcessor()\n    self._sp.Load(spm_path)\n\n  def dense_ids(self, texts):\n    \"\"\"Pads sentences ids out to max length, filling with 0's.\"\"\"\n    return pd.DataFrame(\n        [self._sp.EncodeAsIds(x) for x in texts]).fillna(0).values.astype(int)\n\n  def pieces(self, feature_dict, label_dict):\n    \"\"\"Processes a batch of texts into sentence pieces.\"\"\"\n    text = feature_dict.pop('text')\n    sparse_ids = tf.contrib.layers.dense_to_sparse(\n        tf.py_func(self.dense_ids, [text], tf.int64))\n    feature_dict['values'] = sparse_ids.values\n    feature_dict['indices'] = sparse_ids.indices\n    feature_dict['dense_shape'] = sparse_ids.dense_shape\n    return feature_dict, label_dict\n\n  def _input_fn_from_file(self, filepath: str):\n    filenames_dataset = tf.data.Dataset.list_files(filepath)\n    dataset = tf.data.TFRecordDataset(\n        filenames_dataset)  # type: tf.data.TFRecordDataset\n    # Use parent class parsing to obtain text features, and processed labels.\n    parsed_dataset = dataset.map(self._read_tf_example)\n    return parsed_dataset.batch(self._batch_size).map(\n        self.pieces).prefetch(self._num_prefetch)\n\n\ndef main(argv):\n  del argv  # unused\n\n  module = hub.Module(FLAGS.model_spec)\n  with tf.Session() as sess:\n    spm_path = sess.run(module(signature='spm_path'))\n\n  dataset = TFRecordWithSentencePiece(spm_path)\n  model = tf_hub_classifier.TFHubClassifierModel(dataset.labels())\n\n  trainer = model_trainer.ModelTrainer(dataset, model)\n  trainer.train_with_eval()\n\n  values = tf.placeholder(tf.int64, shape=[None], name='values')\n  indices = tf.placeholder(tf.int64, shape=[None, 2], name='indices')\n  dense_shape = tf.placeholder(tf.int64, shape=[None], name='dense_shape')\n  serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({\n    'values': values,\n    'indices': indices,\n    'dense_shape': dense_shape\n  })\n  trainer.export(serving_input_fn, None)\n\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_kona_prototypical_network/proto.py",
    "content": "import numpy as np\nimport tensorflow as tf\nimport tensorflow_hub as hub\nimport pandas as pd\nimport sys\nimport datetime\nimport collections\n\ntf.app.flags.DEFINE_string(\n    \"train_file\",\n    \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/train_cleaned_text.csv\",\n    \"CSV file containing the training data. Expects columns: domain, label, support_or_query\"\n)\ntf.app.flags.DEFINE_string(\n    \"validation_file\",\n    \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_cleaned_text.csv\",\n    \"CSV file containing the validation data. Expects columns: domain, label, support_or_query\"\n)\ntf.app.flags.DEFINE_string(\n    \"test_file\",\n    \"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_cleaned_text.csv\",\n    \"CSV file containing the test data. Expects columns: domain, label, support_or_query\"\n)\ntf.app.flags.DEFINE_boolean(\n    \"test_mode\", False,\n    \"If true then no training occurs and it prints out metrics on the test set.\"\n)\ntf.app.flags.DEFINE_string(\"model_dir\", \"\", \"The model directory in GCS.\")\ntf.app.flags.DEFINE_string(\n    \"encoding_layers\", \"256,128\",\n    \"Comma delimited integers representing the number of units for each dense layer.\"\n)\n\nFLAGS = tf.app.flags.FLAGS\n\n\ndef distance(embeddings, prototype):\n  return tf.map_fn(tf.norm, embeddings - prototype)\n\n\ndef neg_distance(embs, proto):\n  return -distance(embs, proto)\n\n\ndef calculate_logits(embeddings, positive_prototype, negative_prototype):\n  negative_logits = neg_distance(embeddings, negative_prototype)\n  positive_logits = neg_distance(embeddings, positive_prototype)\n  return tf.stack([negative_logits, positive_logits], axis=1)\n\n\ndef prepare_dataset(data):\n  data[\"text\"] = data.text.fillna(\"\")\n  domains = data.domain.unique()\n\n  positive_supports = []\n  positive_queries = []\n  negative_supports = []\n  negative_queries = []\n\n  for domain in domains:\n    domain_data = data[data[\"domain\"] == domain]\n    positive = domain_data[domain_data[\"label\"] == 1]\n    negative = domain_data[domain_data[\"label\"] == 0]\n    positive_support = positive[positive[\"support_or_query\"] == \"support\"].text\n    positive_query = positive[positive[\"support_or_query\"] == \"query\"].text\n    negative_support = negative[negative[\"support_or_query\"] == \"support\"].text\n    negative_query = negative[negative[\"support_or_query\"] == \"query\"].text\n\n    positive_supports.append(positive_support)\n    positive_queries.append(positive_query)\n    negative_supports.append(negative_support)\n    negative_queries.append(negative_query)\n\n  return tf.data.Dataset.from_tensor_slices({\n      \"positive_supports\": np.array(positive_supports),\n      \"negative_supports\": np.array(negative_supports),\n      \"positive_queries\": np.array(positive_queries),\n      \"negative_queries\": np.array(negative_queries)\n  })\n\n\ndef encoder(dense_config, output_types, output_shapes):\n  \"\"\"Tensorflow graph for getting prototypes and embeddings.\n\n  It contains a placeholder for a tensorflow Iterator called \"handle\" whose\n  elements are a dict containing negative_supports, positive_supports,\n  negative_queries, and positive_queries. All of these are lists of strings.\n\n  Args:\n    dense_config: A list of integers that configure the dense layers.\n    output_types: A dictionary from output name to it's tf type.\n    output_shapes: A dictionary from output name to it's shape.\n\n  Returns:\n    A tuple of logits, the first representing those from the negative query set\n    and the second from the positive query set.\n  \"\"\"\n\n  if not dense_config:\n    raise ValueError(\"encoder must be called with a non empty dense_config\")\n\n  embed = hub.Module(\n      \"https://tfhub.dev/google/universal-sentence-encoder-large/3\")\n  dense_layers = [\n      tf.keras.layers.Dense(units, activation=tf.nn.relu)\n      for units in dense_config\n  ]\n  last_layer = tf.keras.layers.Dense(dense_config[-1], activation=None)\n\n  def get_embeddings(texts):\n    result = embed(texts)\n    for dense_layer in dense_layers:\n      result = dense_layer(result)\n    return last_layer(result)\n\n  get_prototype = lambda texts: tf.reduce_mean(get_embeddings(texts), 0)\n\n  handle = tf.placeholder(tf.string, shape=[])\n  iterator = tf.data.Iterator.from_string_handle(handle, output_types,\n                                                 output_shapes)\n  episode_batch = iterator.get_next()\n\n  with tf.variable_scope(\"negative_prototype\"):\n    negative_prototype = get_prototype(episode_batch[\"negative_supports\"])\n  with tf.variable_scope(\"positive_prototype\"):\n    positive_prototype = get_prototype(episode_batch[\"positive_supports\"])\n  with tf.variable_scope(\"negative_embeddings\"):\n    negative_embeddings = get_embeddings(episode_batch[\"negative_queries\"])\n  with tf.variable_scope(\"positive_embeddings\"):\n    positive_embeddings = get_embeddings(episode_batch[\"positive_queries\"])\n\n  negative_logits = calculate_logits(negative_embeddings, positive_prototype,\n                                     negative_prototype)\n  positive_logits = calculate_logits(positive_embeddings, positive_prototype,\n                                     negative_prototype)\n\n  return handle, negative_logits, positive_logits\n\n\ndef train_operation(negative_logits, positive_logits):\n  negative_loss = tf.losses.softmax_cross_entropy(\n      tf.broadcast_to(tf.one_hot(0, 2), tf.shape(negative_logits)),\n      negative_logits)\n  positive_loss = tf.losses.softmax_cross_entropy(\n      tf.broadcast_to(tf.one_hot(1, 2), tf.shape(positive_logits)),\n      positive_logits)\n  loss = negative_loss + positive_loss\n\n  optimizer = tf.train.AdamOptimizer(0.001)\n  train = optimizer.minimize(loss)\n  return (train, loss)\n\n\ndef predictions_and_metrics(negative_logits, positive_logits):\n  predict = lambda logits: tf.argmax(logits, axis=1)\n\n  negative_predictions = predict(negative_logits)\n  negative_labels = tf.fill(tf.shape(negative_predictions), 0)\n  positive_predictions = predict(positive_logits)\n  positive_labels = tf.fill(tf.shape(positive_predictions), 1)\n\n  probability = tf.nn.softmax(\n      tf.concat([negative_logits, positive_logits], -2), axis=-1)\n  labels = tf.concat([negative_labels, positive_labels], -1)\n  predictions = tf.concat([negative_predictions, positive_predictions], -1)\n\n  acc_op, update_acc_op = tf.metrics.accuracy(labels, predictions)\n  auc_op, update_auc_op = tf.metrics.auc(labels,\n                                         tf.gather(probability, 1, axis=-1))\n  return (predictions, acc_op, auc_op, update_acc_op, update_auc_op)\n\n\ndef main():\n  if FLAGS.model_dir:\n    model_dir = FLAGS.model_dir\n  else:\n    st = datetime.datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\")\n    model_dir = \"gs://conversationai-models/jjtan/transfer_learning/model/\" + st\n  print(\"Model dir: \" + model_dir)\n  save_path = model_dir + \"/save/model.ckpt\"\n  metadata_path = model_dir + \"/meta.txt\"\n\n  with tf.gfile.Open(metadata_path, \"w\") as f:\n    f.write(\"Encoding Layers: \" + FLAGS.encoding_layers + \"\\n\")\n\n  # TODO(jjtan): Convert to flags.\n  output_types = {\n      \"negative_queries\": tf.string,\n      \"negative_supports\": tf.string,\n      \"positive_queries\": tf.string,\n      \"positive_supports\": tf.string\n  }\n  output_shapes = {\n      \"negative_queries\": tf.TensorShape([tf.Dimension(12)]),\n      \"negative_supports\": tf.TensorShape([tf.Dimension(8)]),\n      \"positive_queries\": tf.TensorShape([tf.Dimension(12)]),\n      \"positive_supports\": tf.TensorShape([tf.Dimension(8)])\n  }\n\n  with tf.variable_scope(\"encoder\"):\n    encoding_units = [int(units) for units in FLAGS.encoding_layers.split(\",\")]\n    handle, negative_logits, positive_logits = encoder(encoding_units,\n                                                       output_types,\n                                                       output_shapes)\n\n  if FLAGS.test_mode:\n    print(\"In TEST mode.\")\n    with tf.gfile.Open(FLAGS.test_file, \"r\") as f:\n      test_df = pd.read_csv(f)\n      print(\"Test Dataframe Shape: \" + str(test_df.shape))\n      test_ds = prepare_dataset(test_df).shuffle(64)\n\n    # Test specific model components.\n    with tf.variable_scope(\"test_predictions_and_metrics\"):\n      _, acc_op, auc_op, update_acc_op, update_auc_op = predictions_and_metrics(\n          negative_logits, positive_logits)\n\n    saver = tf.train.Saver()\n\n    test_itr = test_ds.make_one_shot_iterator()\n    with tf.Session() as sess:\n      sess.run(tf.global_variables_initializer())\n      sess.run(tf.tables_initializer())\n      sess.run(tf.initializers.local_variables())\n\n      checkpoint = tf.train.latest_checkpoint(model_dir + \"/save\")\n      saver.restore(sess, checkpoint)\n      test_itr_handle = sess.run(test_itr.string_handle())\n      while True:\n        try:\n          _, _ = sess.run([update_acc_op, update_auc_op],\n                          feed_dict={handle: test_itr_handle})\n        except tf.errors.OutOfRangeError:\n          break\n      test_acc, test_auc = sess.run([acc_op, auc_op])\n      print(\"TEST ACCURACY: \" + str(test_acc))\n      print(\"TEST AUC: \" + str(test_auc))\n  else:\n    print(\"In TRAINING mode.\")\n\n    with tf.gfile.Open(FLAGS.train_file, \"r\") as f:\n      train_df = pd.read_csv(f)\n      print(\"Train Dataframe Shape: \" + str(train_df.shape))\n      train_dataset = prepare_dataset(train_df).shuffle(128).repeat()\n\n    with tf.gfile.Open(FLAGS.validation_file, \"r\") as f:\n      validation_df = pd.read_csv(f)\n      print(\"Validation Dataframe Shape: \" + str(validation_df.shape))\n      validation_dataset = prepare_dataset(validation_df).shuffle(64)\n\n    # Training specific model components.\n    with tf.variable_scope(\"training_operations\"):\n      train_op, loss_op = train_operation(negative_logits, positive_logits)\n    with tf.variable_scope(\"train_predictions_and_metrics\"):\n      _, train_acc_op, train_auc_op, train_update_acc_op, train_update_auc_op = predictions_and_metrics(\n          negative_logits, positive_logits)\n    with tf.variable_scope(\"validation_predictions_and_metrics\"):\n      _, val_acc_op, val_auc_op, val_update_acc_op, val_update_auc_op = predictions_and_metrics(\n          negative_logits, positive_logits)\n\n    saver = tf.train.Saver()\n\n    with tf.Session() as sess:\n      sess.run(tf.global_variables_initializer())\n      sess.run(tf.tables_initializer())\n      sess.run(tf.initializers.local_variables())\n\n      train_writer = tf.summary.FileWriter(model_dir + \"/train\", sess.graph)\n      validation_writer = tf.summary.FileWriter(model_dir + \"/validation\",\n                                                sess.graph)\n\n      training_iterator = train_dataset.make_one_shot_iterator()\n      validation_iterator = validation_dataset.make_initializable_iterator()\n      training_handle = sess.run(training_iterator.string_handle())\n      validation_handle = sess.run(validation_iterator.string_handle())\n\n      best_auc = 0\n      for batch_num in range(500):\n        print(\"Batch: \" + str(batch_num))\n\n        batch_size = 32\n        for i in range(batch_size):\n          _, loss, train_acc, train_auc = sess.run(\n              [train_op, loss_op, train_update_acc_op, train_update_auc_op],\n              feed_dict={handle: training_handle})\n\n          training_summary = tf.Summary(value=[\n              tf.Summary.Value(tag=\"loss\", simple_value=loss),\n              tf.Summary.Value(tag=\"accuracy\", simple_value=train_acc),\n              tf.Summary.Value(tag=\"auc\", simple_value=train_auc),\n          ])\n          train_writer.add_summary(training_summary, batch_num * batch_size + i)\n          train_writer.flush()\n\n        recent_aucs = collections.deque([], 3)\n\n        sess.run(validation_iterator.initializer)\n        for _ in range(32):\n          _, _ = sess.run([val_update_acc_op, val_update_auc_op],\n                          feed_dict={handle: validation_handle})\n        val_acc, val_auc = sess.run([val_acc_op, val_auc_op])\n\n        # Save best version\n        if val_auc > best_auc:\n          best_auc = val_auc\n          saved_path = saver.save(\n              sess, save_path, global_step=(batch_num + 1) * batch_size)\n\n        # Early stopping\n        if len(recent_aucs) >= 3 and all(\n            val_auc < prev_auc for prev_auc in recent_aucs):\n          break\n        recent_aucs.append(val_auc)\n\n        validation_summary = tf.Summary(value=[\n            tf.Summary.Value(tag=\"accuracy\", simple_value=val_acc),\n            tf.Summary.Value(tag=\"auc\", simple_value=val_auc),\n        ])\n        validation_writer.add_summary(validation_summary.SerializeToString(),\n                                      (batch_num + 1) * batch_size)\n        validation_writer.flush()\n\n\nif __name__ == \"__main__\":\n  main()\n"
  },
  {
    "path": "experiments/tf_trainer/tf_word_label_embedding/__init__.py",
    "content": ""
  },
  {
    "path": "experiments/tf_trainer/tf_word_label_embedding/hparam_config.yaml",
    "content": "trainingInput:\n  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc/frac_neg\n    maxTrials: 20\n    maxParallelTrials: 3\n    enableTrialEarlyStopping: TRUE\n    params:\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.0000005\n        maxValue: 0.01\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 1\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n        - 128\n        - 256\n      - parameterName: dense_units\n        type: CATEGORICAL\n        categoricalValues:\n        - '128'\n        - '128,128'\n        - '128,128,128'\n        - '64'\n        - '64,64'\n        - '64,64,64'\n"
  },
  {
    "path": "experiments/tf_trainer/tf_word_label_embedding/model.py",
    "content": "\"\"\"Tensorflow Estimator implementation of Word Label Embeddings.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nimport numpy as np\nfrom tf_trainer.common import base_model\nfrom typing import Set\n\nFLAGS = tf.app.flags.FLAGS\n\n# Hyperparameters\ntf.app.flags.DEFINE_float('learning_rate', 0.000003,\n                          'The learning rate to use during training.')\ntf.app.flags.DEFINE_integer('embedding_size', 100,\n                            'The number of dimensions in the word embedding.')\n# This would normally just be a multi_integer, but we use string due to\n# constraints with ML Engine hyperparameter tuning.\ntf.app.flags.DEFINE_string(\n    'dense_units', '128',\n    'Comma delimited string for the number of hidden units in the dense layer.')\n\n\nclass TFWordLabelEmbeddingModel(base_model.BaseModel):\n\n  def __init__(self, target_label: str) -> None:\n    assert len(target_label) == 1  # Only single feature supported.\n    self._target_label = target_label[0]\n\n  @staticmethod\n  def hparams():\n    dense_units = [int(units) for units in FLAGS.dense_units.split(',')]\n    hparams = tf.contrib.training.HParams(\n        learning_rate=FLAGS.learning_rate,\n        embedding_size=FLAGS.embedding_size,\n        dense_units=dense_units)\n    return hparams\n\n  def estimator(self, model_dir):\n    estimator = tf.estimator.Estimator(\n        model_fn=self._model_fn,\n        params=self.hparams(),\n        config=tf.estimator.RunConfig(model_dir=model_dir))\n    return estimator\n\n  def _model_fn(self, features, labels, mode, params, config):\n    word_emb_seq = features[base_model.TOKENS_FEATURE_KEY]\n\n    # Constants\n\n    labels = labels[self._target_label]\n\n    # Class emb\n    class_emb_initializer = tf.random_normal_initializer(\n        mean=0.0, stddev=1.0, dtype=tf.float32)\n    class_embs = tf.get_variable(\n        'class_embs', [2, params.embedding_size],\n        initializer=class_emb_initializer)\n\n    word_emb_seq_norm = tf.nn.l2_normalize(word_emb_seq, axis=-1)\n    class_embs_norm = tf.nn.l2_normalize(class_embs, axis=-1)\n\n    cosine_distance = tf.contrib.keras.backend.dot(\n        word_emb_seq_norm, tf.transpose(class_embs_norm))\n    cosine_distance = tf.expand_dims(cosine_distance, axis=-1)\n    cosine_distance = tf.contrib.layers.conv2d(\n        cosine_distance,\n        num_outputs=1,\n        kernel_size=[5, 1],\n        padding='SAME',\n        activation_fn=tf.nn.relu)\n    cosine_distance = tf.squeeze(cosine_distance, axis=-1)\n\n    max_cosine_distance = tf.reduce_max(cosine_distance, axis=-1)\n    attention = tf.nn.softmax(max_cosine_distance, axis=-1)\n    attention = tf.expand_dims(attention, axis=-1)\n\n    weighted_word_emb = tf.reduce_sum(word_emb_seq * attention, axis=1)\n\n    f2 = []\n    for num_units in params.dense_units:\n      f2.append(tf.layers.Dense(units=num_units, activation=tf.nn.relu))\n    f2.append(tf.layers.Dense(units=1, activation=None))\n\n    logits = weighted_word_emb\n    for layer in f2:\n      logits = layer(logits)\n\n    class_zero_logits = tf.expand_dims(class_embs[0, :], 0)\n    for layer in f2:\n      class_zero_logits = layer(class_zero_logits)\n    class_zero_reg = tf.nn.sigmoid_cross_entropy_with_logits(\n        labels=[[0.0]], logits=class_zero_logits)\n\n    class_one_logits = tf.expand_dims(class_embs[1, :], 0)\n    for layer in f2:\n      class_one_logits = layer(class_one_logits)\n    class_one_reg = tf.nn.sigmoid_cross_entropy_with_logits(\n        labels=[[1.0]], logits=class_one_logits)\n\n    loss = tf.nn.sigmoid_cross_entropy_with_logits(\n        labels=labels, logits=logits) + class_zero_reg + class_one_reg\n    head = tf.contrib.estimator.binary_classification_head(\n        name=self._target_label, loss_fn=lambda labels, logits: loss)\n\n    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)\n    return head.create_estimator_spec(\n        features=features,\n        labels=labels,\n        mode=mode,\n        logits=logits,\n        optimizer=optimizer)\n"
  },
  {
    "path": "experiments/tf_trainer/tf_word_label_embedding/run.hyperparameter.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_word_label_embedding\"\nMODEL_NAME_DATA=\"${MODEL_NAME}_$1\"\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME_DATA}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.10 \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --verbosity=debug \\\n    --config=\"tf_trainer/${MODEL_NAME}/hparam_config_$1.yaml\" \\\n    -- \\\n    --train_path=$train_path \\\n    --validate_path=$valid_path \\\n    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt\" \\\n    --embedding_size=300 \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \\\n    --is_embedding_trainable=False \\\n    --train_steps=$train_steps \\\n    --eval_period=$eval_period \\\n    --eval_steps=$eval_steps \\\n    --labels=$labels \\\n    --label_dtypes=$label_dtypes \\\n    --preprocess_in_tf=False\n\necho \"Model dir:\"\necho ${JOB_DIR}/model_dir"
  },
  {
    "path": "experiments/tf_trainer/tf_word_label_embedding/run.local.sh",
    "content": "#!/bin/bash\n\nsource \"tf_trainer/common/dataset_config.sh\"\n\npython -m tf_trainer.tf_word_label_embedding.run \\\n  --train_path=$train_path \\\n  --validate_path=$valid_path \\\n  --model_dir=\"tf_word_label_embedding_local_model_dir\" \\\n  --labels=$labels \\\n  --label_dtypes=$label_dtypes"
  },
  {
    "path": "experiments/tf_trainer/tf_word_label_embedding/run.ml_engine.sh",
    "content": "#!/bin/bash\n# This script runs one training job on Cloud MLE.\n\n# Note:\n# We currently use 2 different embeddings:\n# - glove.6B/glove.6B.300d.txt\n# - google-news/GoogleNews-vectors-negative300.txt\n# Glove assumes all words are lowercased, while Google-news handles different casing.\n# As there is currently no tf operation that perform lowercasing, we have the following \n# requirements:\n# - For google news: Run preprocess_in_tf=True (no lowercasing).\n# - For glove.6B, Run preprocess_in_tf=False (will force lowercasing).\n\nsource \"tf_trainer/common/dataset_config.sh\"\nDATETIME=$(date '+%Y%m%d_%H%M%S')\nMODEL_NAME=\"tf_word_label_embedding\"\nMODEL_NAME_DATA=\"${MODEL_NAME}_$1\"\nJOB_DIR=\"${MODEL_PARENT_DIR}/${USER}/${MODEL_NAME_DATA}/${DATETIME}\"\n\ngcloud ml-engine jobs submit training tf_trainer_${MODEL_NAME}_${USER}_${DATETIME} \\\n    --job-dir=${JOB_DIR} \\\n    --runtime-version=1.10 \\\n    --scale-tier 'BASIC_GPU' \\\n    --module-name=\"tf_trainer.${MODEL_NAME}.run\" \\\n    --package-path=tf_trainer \\\n    --python-version \"3.5\" \\\n    --region=us-east1 \\\n    --verbosity=debug \\\n    -- \\\n    --train_path=\"${GCS_RESOURCES}/toxicity_q42017_train.tfrecord\" \\\n    --validate_path=\"${GCS_RESOURCES}/toxicity_q42017_validate.tfrecord\" \\\n    --embeddings_path=\"${GCS_RESOURCES}/glove.6B/glove.6B.300d.txt\" \\\n    --embedding_size=300 \\\n    --preprocess_in_tf=False \\\n    --model_dir=\"${JOB_DIR}/model_dir\" \n"
  },
  {
    "path": "experiments/tf_trainer/tf_word_label_embedding/run.py",
    "content": "\"\"\"Experiments with Toxicity Dataset\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport nltk\nimport tensorflow as tf\n\nfrom tf_trainer.common import base_model\nfrom tf_trainer.common import model_trainer\nfrom tf_trainer.common import serving_input\nfrom tf_trainer.common import text_preprocessor\nfrom tf_trainer.common import tfrecord_input\nfrom tf_trainer.common import types\nfrom tf_trainer.tf_word_label_embedding import model as tf_word_label_embedding\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string('embeddings_path',\n                           'local_data/glove.6B/glove.6B.100d.txt',\n                           'Path to the embeddings file.')\n\n\ndef main(argv):\n  del argv  # unused\n\n  preprocessor = text_preprocessor.TextPreprocessor(FLAGS.embeddings_path)\n\n  nltk.download('punkt')\n  train_preprocess_fn = preprocessor.train_preprocess_fn(nltk.word_tokenize)\n  dataset = tfrecord_input.TFRecordInputWithTokenizer(\n      train_preprocess_fn=train_preprocess_fn, max_seq_len=5000)\n\n  model_tf = tf_word_label_embedding.TFWordLabelEmbeddingModel(dataset.labels())\n  model = preprocessor.add_embedding_to_model(model_tf,\n                                              base_model.TOKENS_FEATURE_KEY)\n\n  trainer = model_trainer.ModelTrainer(dataset, model)\n  trainer.train_with_eval()\n\n\nif __name__ == '__main__':\n  tf.logging.set_verbosity(tf.logging.INFO)\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tools/bert_tfrecord_converter.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"Converts our TFRecord data into the format expected by the BERT model.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport bert\nfrom bert import run_classifier\nimport collections\nimport numpy as np\nimport pandas as pd\nimport tensorflow as tf\nimport tensorflow_hub as hub\n\ntf.app.flags.DEFINE_string('input_data_path', None,\n                           'Path to the input TFRecord files.')\ntf.app.flags.DEFINE_string('output_data_path', None,\n                           'Path to write the output TFRecord files.')\ntf.app.flags.DEFINE_string('filenames', None,\n                           'Comma separated list of filenames.')\ntf.app.flags.DEFINE_string('text_key', 'comment_text',\n                           'tf.Example key for text field in input TFRecord.')\ntf.app.flags.DEFINE_string('label_key', 'toxicity',\n                           'tf.Example key for label field in input TFRecord.')\ntf.app.flags.DEFINE_string('bert_url', 'https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1', 'TF Hub URL for BERT Model')\ntf.app.flags.DEFINE_integer('max_sequence_length', 128,\n                            'Maximum sequence length of tokenized comment.')\n\nFLAGS = tf.app.flags.FLAGS\n\ndef create_int_feature(values):\n  f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))\n  return f\n\ndef create_tokenizer_from_hub_module(url):\n  \"\"\"Get the vocab file and casing info from the Hub module.\"\"\"\n  with tf.Graph().as_default():\n    bert_module = hub.Module(url)\n    tokenization_info = bert_module(signature=\"tokenization_info\", as_dict=True)\n    with tf.Session() as sess:\n      vocab_file, do_lower_case = sess.run([tokenization_info[\"vocab_file\"],\n                                            tokenization_info[\"do_lower_case\"]])\n\n  return bert.tokenization.FullTokenizer(\n      vocab_file=vocab_file, do_lower_case=do_lower_case)\n\n\ndef convert_tfrecord_for_bert(filenames,\n                              input_data_path,\n                              output_data_path,\n                              bert_tfhub_url,\n                              text_key,\n                              label_key,\n                              max_seq_length):\n  \"\"\"Converts input TFRecords into the format expected by the BERT model.\"\"\"\n  tokenizer = create_tokenizer_from_hub_module(bert_tfhub_url)\n  for filename in filenames:\n    print('Working on {}...'.format(filename))\n    in_filepath = '{}{}'.format(input_data_path, filename)\n    #TODO: Check if file exists, if not write new file\n    #TODO: Have the filename reflect the max_sequence_length and path reflect model\n    out_filepath = '{}{}'.format(output_data_path, filename)\n    record_iterator = tf.python_io.tf_record_iterator(path=in_filepath)\n    writer = tf.python_io.TFRecordWriter(out_filepath)\n    for ex_index, string_record in enumerate(record_iterator):\n      example = tf.train.Example()\n      example.ParseFromString(string_record)\n      text = example.features.feature[text_key].bytes_list.value[0]\n      label = example.features.feature[label_key].float_list.value[0]\n      label = round(label)\n      ex = run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping\n                                      text_a = text, \n                                      text_b = None, \n                                      label = label)\n      label_list = [0, 1]\n      feature = run_classifier.convert_single_example(ex_index, ex, label_list,\n                                                      max_seq_length, tokenizer)\n      features = collections.OrderedDict()\n      features[\"input_ids\"] = create_int_feature(feature.input_ids)\n      features[\"input_mask\"] = create_int_feature(feature.input_mask)\n      features[\"segment_ids\"] = create_int_feature(feature.segment_ids)\n      features[\"label_ids\"] = create_int_feature([feature.label_id])\n      features[\"is_real_example\"] = create_int_feature(\n          [int(feature.is_real_example)])\n\n      tf_example = tf.train.Example(features=tf.train.Features(feature=features))\n      writer.write(tf_example.SerializeToString())\n    writer.close()\n    print('... Done!')\n\nif __name__ == '__main__':\n  filenames = [name.strip() for name in FLAGS.filenames.split(',')]\n  convert_tfrecord_for_bert(filenames,\n                            FLAGS.input_data_path,\n                            FLAGS.output_data_path,\n                            FLAGS.bert_url,\n                            FLAGS.text_key,\n                            FLAGS.label_key,\n                            FLAGS.max_sequence_length)"
  },
  {
    "path": "experiments/tools/convert_csv_to_tfrecord.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"A function to convert csvs to TFRecords.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport pandas as pd\nimport tensorflow as tf\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string(\"input_csv_path\", None,\n                           \"Path to the input csv file.\")\ntf.app.flags.DEFINE_string(\"output_tfrecord_path\", None,\n                           \"Path where the output TFRecord should be written.\")\ntf.app.flags.DEFINE_string(\"column_list\", None, \n                           \"Comma seperated list of column names.\")\ntf.app.flags.DEFINE_string(\"dtype_list\", None, \n                           \"Comma seperated list of column dtypes. \"\n                           \"Each entry should be one of [bytes,str,float,int]).\")\n\n\ndef convert_csv_to_tfrecord(input_csv_path,\n                            output_tfrecord_path,\n                            column_names,\n                            column_dtypes):\n  df = pd.read_csv(tf.gfile.Open(input_csv_path))\n  with tf.python_io.TFRecordWriter(output_tfrecord_path) as writer:\n    for row in df.iterrows():\n      row = row[1]\n      example = tf.train.Example()\n      for col_name,dtype in zip(column_names,column_dtypes):\n        col_val = row[col_name]\n        if dtype == 'bytes':\n          example.features.feature[col_name].bytes_list.value.append(\n              col_val)\n        elif dtype == 'str':\n          example.features.feature[col_name].bytes_list.value.append(\n              str(col_val).encode(\"utf-8\", errors=\"replace\"))\n        elif dtype == 'float':\n          example.features.feature[col_name].float_list.value.append(col_val)\n        elif dtype == 'int':\n          example.features.feature[col_name].int64_list.value.append(col_val)\n        else:\n          raise ValueError('dtype must be one of bytes, str, float, int.')\n      writer.write(example.SerializeToString())\n\n\ndef main(argv):\n  del argv  # unused\n\n  input_csv_path = FLAGS.input_csv_path\n  output_tfrecord_path = FLAGS.output_tfrecord_path\n  column_names = FLAGS.column_list.split(',')\n  column_dtypes = FLAGS.dtype_list.split(',')\n  assert len(column_names) == len(column_dtypes)\n\n  convert_csv_to_tfrecord(input_csv_path, \n                          output_tfrecord_path,\n                          column_names,\n                          column_dtypes)\n\n\nif __name__ == \"__main__\":\n  tf.app.run(main)\n"
  },
  {
    "path": "experiments/tools/convert_jsonl_to_tfrecord.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Conversation-AI.github.io Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the 'License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an 'AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"A function to convert jsonlines to TFRecords.\n\npython tools/convert_jsonl_to_tfrecord.py \\\n --input_jsonlines_path=tf_trainer/common/testdata/cats_and_dogs.jsonl \\\n --text_fields_re='^(text)$' \\\n --label_fields_re='^(bad)$' \\\n --output_tfrecord_path=local_data/testdata/cats_and_dogs.recordio\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom absl import flags\nfrom absl import app\nfrom absl import logging\nimport json\nimport jsonlines\nimport tensorflow as tf\nimport re\n\nFLAGS = flags.FLAGS\n\n# TODO: Compute basic stats for text fields and labels.\nflags.DEFINE_string('text_fields_re', None,\n                    'Matcher for names of the text fields.')\nflags.register_validator(\n    'text_fields_re',\n    lambda value: isinstance(value, str) and re.compile(value),\n    message='--text_fields_re must be a regexp string.')\n\nflags.DEFINE_string('label_fields_re', None,\n                    'Matcher for names of the label fields.')\nflags.register_validator(\n    'label_fields_re',\n    lambda value: isinstance(value, str) and re.compile(value),\n    message='--label_fields_re must be a regexp string.')\n\nflags.DEFINE_string('input_jsonlines_path', None,\n                    'Path to the JSON-lines input file.')\nflags.register_validator(\n    'input_jsonlines_path',\n    lambda value: isinstance(value, str),\n    message='--input_jsonlines_path must be a string.')\n\nflags.DEFINE_string('output_tfrecord_path', None,\n                    'Path where the output TFRecord should be written.')\nflags.register_validator(\n    'output_tfrecord_path',\n    lambda value: isinstance(value, str),\n    message='--output_tfrecord_path must be a string.')\n\n\nclass MisingAllTextFieldsError(Exception):\n  pass\n\n\nclass FieldsCounter():\n\n  def __init__(self):\n    self.counters = {}\n\n  def inc_field(self, field_name: str):\n    if field_name not in self.counters:\n      self.counters[field_name] = 0\n    self.counters[field_name] += 1\n\n\ndef make_selected_output_row(row, line, counters):\n  \"\"\"Create an output row with just the fields matching --text_fields_re and\n\n  --label_fields_re. If there is no matching field in the row for\n  --text_fields_re then raise MisingAllTextFieldsError.\n  \"\"\"\n  text_field_matcher = re.compile(FLAGS.text_fields_re)\n  label_field_matcher = re.compile(FLAGS.label_fields_re)\n  has_text_field = False\n  output_row = {}\n  for key, value in row.items():\n    if text_field_matcher.match(key):\n      has_text_field = True\n      counters.inc_field(key)\n      output_row[key] = value\n    elif label_field_matcher.match(key):\n      counters.inc_field(key)\n      output_row[key] = value\n  if not has_text_field:\n    raise MisingAllTextFieldsError(\n        f'Error parsing file {FLAGS.input_jsonlines_path} at line: {line}.\\n'\n        f'No field matched by --text_field_regexp=\"{FLAGS.text_fields_re}\":\\n'\n        f'  {json.dumps(row, sort_keys=True, indent=2)}')\n  return output_row\n\n\ndef itr_as_dict(input_jsonlines_path):\n  with tf.gfile.Open(input_jsonlines_path) as f:\n    counters = FieldsCounter()\n    line = 0\n    for row in jsonlines.Reader(f):\n      line += 1\n      yield make_selected_output_row(row, line, counters)\n    logging.info(f'Complete.\\nField Counts:\\n'\n                 f'{json.dumps(counters.counters, sort_keys=True, indent=2)}')\n\n\ndef itr_as_tfrecord(input_jsonlines_path):\n  for row in itr_as_dict(input_jsonlines_path):\n    example = tf.train.Example()\n    for key, value in row.items():\n      if isinstance(value, str):\n        example.features.feature[key].bytes_list.value.append(\n            value.encode('utf-8', errors='replace'))\n      elif isinstance(value, float) or isinstance(value, int):\n        example.features.feature[key].float_list.value.append(value)\n    yield example\n\n\ndef convert_to_tfrecord(input_jsonlines_path, output_tfrecord_path):\n  with tf.python_io.TFRecordWriter(output_tfrecord_path) as writer:\n    for example in itr_as_tfrecord(input_jsonlines_path):\n      writer.write(example.SerializeToString())\n\n\ndef main(argv):\n  del argv  # unused\n  convert_to_tfrecord(FLAGS.input_jsonlines_path, FLAGS.output_tfrecord_path)\n\n\nif __name__ == '__main__':\n  app.run(main)\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/.gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*,cover\n.hypothesis/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# IPython Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# dotenv\n.env\n\n# virtualenv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n\n# Rope project settings\n.ropeproject\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/HAN_model.py",
    "content": "import tensorflow as tf\nimport tensorflow.contrib.layers as layers\nimport numpy as np\nimport data_util\nfrom model_components import task_specific_attention, bidirectional_rnn\n\n\nclass HANClassifierModel():\n  \"\"\" Implementation of document classification model described in\n\n    `Hierarchical Attention Networks for Document Classification (Yang et al.,\n    2016)`\n    (https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf)\n  \"\"\"\n\n  def __init__(self,\n               vocab_size,\n               embedding_size,\n               classes,\n               fw_word_cell,\n               bw_word_cell,\n               fw_sentence_cell,\n               bw_sentence_cell,\n               word_output_size,\n               sentence_output_size,\n               max_grad_norm,\n               dropout_keep_proba,\n               is_training=None,\n               learning_rate=1e-4,\n               device='/cpu:0',\n               scope=None):\n    self.vocab_size = vocab_size\n    self.embedding_size = embedding_size\n    self.classes = classes\n    self.fw_word_cell = fw_word_cell\n    self.bw_word_cell = bw_word_cell\n    self.word_output_size = word_output_size\n    self.fw_sentence_cell = fw_sentence_cell\n    self.bw_sentence_cell = bw_sentence_cell\n    self.sentence_output_size = sentence_output_size\n    self.max_grad_norm = max_grad_norm\n    self.dropout_keep_proba = dropout_keep_proba\n\n    with tf.variable_scope(scope or 'tcm') as scope:\n      self.global_step = tf.Variable(0, name='global_step', trainable=False)\n\n      if is_training is not None:\n        self.is_training = is_training\n      else:\n        self.is_training = tf.placeholder(dtype=tf.bool, name='is_training')\n\n      self.sample_weights = tf.placeholder(\n          shape=(None,), dtype=tf.float32, name='sample_weights')\n\n      # [document x sentence x word]\n      self.inputs = tf.placeholder(\n          shape=(None, None, None), dtype=tf.int32, name='inputs')\n\n      # [document x sentence]\n      self.word_lengths = tf.placeholder(\n          shape=(None, None), dtype=tf.int32, name='word_lengths')\n\n      # [document]\n      self.sentence_lengths = tf.placeholder(\n          shape=(None,), dtype=tf.int32, name='sentence_lengths')\n\n      # [document]\n      self.labels = tf.placeholder(shape=(None,), dtype=tf.int32, name='labels')\n\n      (self.document_size, self.sentence_size, self.word_size) = tf.unstack(\n          tf.shape(self.inputs))\n\n      self._init_embedding(scope)\n\n      # embeddings cannot be placed on GPU\n      with tf.device(device):\n        self._init_body(scope)\n\n    with tf.variable_scope('train'):\n      self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(\n          labels=self.labels, logits=self.logits)\n\n      self.loss = tf.reduce_mean(\n          tf.multiply(self.cross_entropy, self.sample_weights))\n      tf.summary.scalar('loss', self.loss)\n\n      self.accuracy = tf.reduce_mean(\n          tf.cast(tf.nn.in_top_k(self.logits, self.labels, 1), tf.float32))\n      tf.summary.scalar('accuracy', self.accuracy)\n\n      tvars = tf.trainable_variables()\n\n      grads, global_norm = tf.clip_by_global_norm(\n          tf.gradients(self.loss, tvars), self.max_grad_norm)\n      tf.summary.scalar('global_grad_norm', global_norm)\n\n      opt = tf.train.AdamOptimizer(learning_rate)\n\n      self.train_op = opt.apply_gradients(\n          zip(grads, tvars), name='train_op', global_step=self.global_step)\n\n      self.summary_op = tf.summary.merge_all()\n\n  def _init_embedding(self, scope):\n    with tf.variable_scope(scope):\n      with tf.variable_scope('embedding') as scope:\n        self.embedding_matrix = tf.get_variable(\n            name='embedding_matrix',\n            shape=[self.vocab_size, self.embedding_size],\n            initializer=layers.xavier_initializer(),\n            dtype=tf.float32)\n        self.inputs_embedded = tf.nn.embedding_lookup(self.embedding_matrix,\n                                                      self.inputs)\n\n  def _init_body(self, scope):\n    with tf.variable_scope(scope):\n\n      word_level_inputs = tf.reshape(self.inputs_embedded, [\n          self.document_size * self.sentence_size, self.word_size,\n          self.embedding_size\n      ])\n      word_level_lengths = tf.reshape(self.word_lengths,\n                                      [self.document_size * self.sentence_size])\n\n      with tf.variable_scope('word') as scope:\n        word_encoder_output, _ = bidirectional_rnn(\n            self.fw_word_cell,\n            self.bw_word_cell,\n            word_level_inputs,\n            word_level_lengths,\n            scope=scope)\n\n        with tf.variable_scope('attention') as scope:\n          word_level_output = task_specific_attention(\n              word_encoder_output, self.word_output_size, scope=scope)\n\n        with tf.variable_scope('dropout'):\n          word_level_output = layers.dropout(\n              word_level_output,\n              keep_prob=self.dropout_keep_proba,\n              is_training=self.is_training,\n          )\n\n      # sentence_level\n\n      sentence_inputs = tf.reshape(\n          word_level_output,\n          [self.document_size, self.sentence_size, self.word_output_size])\n\n      with tf.variable_scope('sentence') as scope:\n        sentence_encoder_output, _ = bidirectional_rnn(\n            self.fw_sentence_cell,\n            self.bw_sentence_cell,\n            sentence_inputs,\n            self.sentence_lengths,\n            scope=scope)\n\n        with tf.variable_scope('attention') as scope:\n          sentence_level_output = task_specific_attention(\n              sentence_encoder_output, self.sentence_output_size, scope=scope)\n\n        with tf.variable_scope('dropout'):\n          sentence_level_output = layers.dropout(\n              sentence_level_output,\n              keep_prob=self.dropout_keep_proba,\n              is_training=self.is_training,\n          )\n\n      with tf.variable_scope('classifier'):\n        self.logits = layers.fully_connected(\n            sentence_level_output, self.classes, activation_fn=None)\n\n        self.prediction = tf.argmax(self.logits, axis=-1)\n\n  def get_feed_data(self, x, y=None, class_weights=None, is_training=True):\n    x_m, doc_sizes, sent_sizes = data_util.batch(x)\n    fd = {\n        self.inputs: x_m,\n        self.sentence_lengths: doc_sizes,\n        self.word_lengths: sent_sizes,\n    }\n    if y is not None:\n      fd[self.labels] = y\n      if class_weights is not None:\n        fd[self.sample_weights] = [class_weights[yy] for yy in y]\n      else:\n        fd[self.sample_weights] = np.ones(shape=[len(x_m)], dtype=np.float32)\n    fd[self.is_training] = is_training\n    return fd\n\n\nif __name__ == '__main__':\n  try:\n    from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple, GRUCell\n  except ImportError:\n    LSTMCell = tf.nn.rnn_cell.LSTMCell\n    LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple\n    GRUCell = tf.nn.rnn_cell.GRUCell\n\n  tf.reset_default_graph()\n  with tf.Session() as session:\n    model = HANClassifierModel(\n        vocab_size=10,\n        embedding_size=5,\n        classes=2,\n        fw_word_cell=GRUCell(10),\n        bw_word_cell=GRUCell(10),\n        fw_sentence_cell=GRUCell(10),\n        bw_sentence_cell=GRUCell(10),\n        word_output_size=10,\n        sentence_output_size=10,\n        max_grad_norm=5.0,\n        dropout_keep_proba=0.5,\n    )\n    session.run(tf.global_variables_initializer())\n\n    fd = {\n        model.is_training: False,\n        model.inputs: [[[5, 4, 1, 0], [3, 3, 6, 7], [6, 7, 0, 0]],\n                       [[2, 2, 1, 0], [3, 3, 6, 7], [0, 0, 0, 0]]],\n        model.word_lengths: [\n            [3, 4, 2],\n            [3, 4, 0],\n        ],\n        model.sentence_lengths: [3, 2],\n        model.labels: [0, 1],\n    }\n\n    print(session.run(model.logits, fd))\n    session.run(model.train_op, fd)\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/LICENSE",
    "content": "MIT License\n\nCopyright (c) 2017 Matvey Ezhov\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/README.md",
    "content": "# Deep Text Classifier\n\nImplementation of document classification model described in [Hierarchical Attention Networks for Document Classification (Yang et al., 2016)](https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf).\n\n## How to run\n\n1. Create a virtual environment, activate it, and install requirements:\n```\npython3 -m venv env\nsource env/bin/activate\npip install -r requirements.txt\n```\n\n2. Download the English model for spaCy:\n\n```\npython -m spacy download en\n```\n\n3. Get [Yelp review dataset](https://www.yelp.com/dataset_challenge) and extract it in this directory.\n```\npython3 yelp_prepare.py dataset/review.json\npython3 worker.py --mode=train --device=/gpu:0 --batch-size=30\n```\n\n## Results\nI am getting 65% accuracy on a dev set (16% of data) after 3 epochs. Results reported in the paper are 71% on Yelp'15.\nNo systemic hyperparameter optimization was performed."
  },
  {
    "path": "hierarchical_attention_research/han_model/bn_lstm.py",
    "content": "# borrowed from https://github.com/OlavHN/bnlstm, updated for r1.0\n\nimport math\nimport numpy as np\nimport tensorflow as tf\n\ntry:\n  from tensorflow.contrib.rnn import RNNCell\nexcept ImportError:\n  RNNCell = tf.nn.rnn_cell.RNNCel\n\n\nclass LSTMCell(RNNCell):\n  \"\"\"Vanilla LSTM implemented with same initializations as BN-LSTM\"\"\"\n\n  def __init__(self, num_units):\n    self.num_units = num_units\n\n  @property\n  def state_size(self):\n    return (self.num_units, self.num_units)\n\n  @property\n  def output_size(self):\n    return self.num_units\n\n  def __call__(self, x, state, scope=None):\n    with tf.variable_scope(scope or type(self).__name__):\n      c, h = state\n\n      # Keep W_xh and W_hh separate here as well to reuse initialization methods\n      x_size = x.get_shape().as_list()[1]\n      W_xh = tf.get_variable(\n          'W_xh', [x_size, 4 * self.num_units],\n          initializer=orthogonal_initializer())\n      W_hh = tf.get_variable(\n          'W_hh', [self.num_units, 4 * self.num_units],\n          initializer=bn_lstm_identity_initializer(0.95))\n      bias = tf.get_variable('bias', [4 * self.num_units])\n\n      # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias\n      # improve speed by concat.\n      concat = tf.concat([x, h], 1)\n      W_both = tf.concat([W_xh, W_hh], 0)\n      hidden = tf.matmul(concat, W_both) + bias\n\n      i, j, f, o = tf.split(hidden, 4, axis=1)\n\n      new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)\n      new_h = tf.tanh(new_c) * tf.sigmoid(o)\n\n      return new_h, (new_c, new_h)\n\n\nclass BNLSTMCell(RNNCell):\n  \"\"\"Batch normalized LSTM as described in http://arxiv.org/abs/1603.09025\"\"\"\n\n  def __init__(self, num_units, training):\n    self.num_units = num_units\n    self.training = training\n\n  @property\n  def state_size(self):\n    return (self.num_units, self.num_units)\n\n  @property\n  def output_size(self):\n    return self.num_units\n\n  def __call__(self, x, state, scope=None):\n    with tf.variable_scope(scope or 'bn_lstm'):\n      c, h = state\n\n      x_size = x.get_shape().as_list()[1]\n      W_xh = tf.get_variable(\n          'W_xh', [x_size, 4 * self.num_units],\n          initializer=orthogonal_initializer())\n      W_hh = tf.get_variable(\n          'W_hh', [self.num_units, 4 * self.num_units],\n          initializer=bn_lstm_identity_initializer(0.95))\n      bias = tf.get_variable('bias', [4 * self.num_units])\n\n      xh = tf.matmul(x, W_xh)\n      hh = tf.matmul(h, W_hh)\n\n      bn_xh = batch_norm(xh, 'xh', self.training)\n      bn_hh = batch_norm(hh, 'hh', self.training)\n\n      hidden = bn_xh + bn_hh + bias\n\n      i, j, f, o = tf.split(hidden, 4, axis=1)\n\n      new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)\n      bn_new_c = batch_norm(new_c, 'c', self.training)\n\n      new_h = tf.tanh(bn_new_c) * tf.sigmoid(o)\n\n      return new_h, (new_c, new_h)\n\n\ndef orthogonal(shape):\n  flat_shape = (shape[0], np.prod(shape[1:]))\n  a = np.random.normal(0.0, 1.0, flat_shape)\n  u, _, v = np.linalg.svd(a, full_matrices=False)\n  q = u if u.shape == flat_shape else v\n  return q.reshape(shape)\n\n\ndef bn_lstm_identity_initializer(scale):\n\n  def _initializer(shape, dtype=tf.float32, partition_info=None):\n    \"\"\"Ugly cause LSTM params calculated in one matrix multiply\"\"\"\n    size = shape[0]\n    # gate (j) is identity\n    t = np.zeros(shape)\n    t[:, size:size * 2] = np.identity(size) * scale\n    t[:, :size] = orthogonal([size, size])\n    t[:, size * 2:size * 3] = orthogonal([size, size])\n    t[:, size * 3:] = orthogonal([size, size])\n    return tf.constant(t, dtype=dtype)\n\n  return _initializer\n\n\ndef orthogonal_initializer():\n\n  def _initializer(shape, dtype=tf.float32, partition_info=None):\n    return tf.constant(orthogonal(shape), dtype)\n\n  return _initializer\n\n\ndef batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999):\n  \"\"\"Assume 2d [batch, values] tensor\"\"\"\n\n  with tf.variable_scope(name_scope):\n    size = x.get_shape().as_list()[1]\n\n    scale = tf.get_variable(\n        'scale', [size], initializer=tf.constant_initializer(0.1))\n    offset = tf.get_variable('offset', [size])\n\n    pop_mean = tf.get_variable(\n        'pop_mean', [size], initializer=tf.zeros_initializer(), trainable=False)\n    pop_var = tf.get_variable(\n        'pop_var', [size], initializer=tf.ones_initializer(), trainable=False)\n    batch_mean, batch_var = tf.nn.moments(x, [0])\n\n    train_mean_op = tf.assign(pop_mean,\n                              pop_mean * decay + batch_mean * (1 - decay))\n    train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))\n\n    def batch_statistics():\n      with tf.control_dependencies([train_mean_op, train_var_op]):\n        return tf.nn.batch_normalization(x, batch_mean, batch_var, offset,\n                                         scale, epsilon)\n\n    def population_statistics():\n      return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale,\n                                       epsilon)\n\n    return tf.cond(training, batch_statistics, population_statistics)\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/bn_lstm_test.py",
    "content": "import time\nimport uuid\nimport os\nimport numpy as np\nimport tensorflow as tf\nfrom tensorflow.python.ops.rnn import dynamic_rnn\nfrom bn_lstm import LSTMCell, BNLSTMCell, orthogonal_initializer\nfrom tensorflow.examples.tutorials.mnist import input_data\n\nbatch_size = 100\nhidden_size = 100\n\nmnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)\n\nx = tf.placeholder(tf.float32, [None, 784])\ntraining = tf.placeholder(tf.bool)\n\nx_inp = tf.expand_dims(x, -1)\nlstm = BNLSTMCell(hidden_size, training) #LSTMCell(hidden_size)\n\n#c, h\ninitialState = (\n    tf.random_normal([batch_size, hidden_size], stddev=0.1),\n    tf.random_normal([batch_size, hidden_size], stddev=0.1))\n\noutputs, state = dynamic_rnn(lstm, x_inp, initial_state=initialState, dtype=tf.float32)\n\n_, final_hidden = state\n\nW = tf.get_variable('W', [hidden_size, 10], initializer=orthogonal_initializer())\nb = tf.get_variable('b', [10])\n\ny = tf.nn.softmax(tf.matmul(final_hidden, W) + b)\n\ny_ = tf.placeholder(tf.float32, [None, 10])\n\ncross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))\n\noptimizer = tf.train.AdamOptimizer()\ngvs = optimizer.compute_gradients(cross_entropy)\ncapped_gvs = [(None if grad is None else tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]\ntrain_step = optimizer.apply_gradients(capped_gvs)\n\ncorrect_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))\naccuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n\n# Summaries\ntf.summary.scalar(\"accuracy\", accuracy)\ntf.summary.scalar(\"xe_loss\", cross_entropy)\nfor (grad, var), (capped_grad, _) in zip(gvs, capped_gvs):\n    if grad is not None:\n        tf.summary.histogram('grad/{}'.format(var.name), capped_grad)\n        tf.summary.histogram('capped_fraction/{}'.format(var.name),\n            tf.nn.zero_fraction(grad - capped_grad))\n        tf.summary.histogram('weight/{}'.format(var.name), var)\n\nmerged = tf.merge_all_summaries()\n\ninit = tf.initialize_all_variables()\n\nsess = tf.Session()\nsess.run(init)\n\nlogdir = 'logs/' + str(uuid.uuid4())\nos.makedirs(logdir)\nprint('logging to ' + logdir)\nwriter = tf.summary.trainWriter(logdir, sess.graph)\n\ncurrent_time = time.time()\nprint(\"Using population statistics (training: False) at test time gives worse results than batch statistics\")\n\nfor i in range(100000):\n    batch_xs, batch_ys = mnist.train.next_batch(batch_size)\n    loss, _ = sess.run([cross_entropy, train_step], feed_dict={x: batch_xs, y_: batch_ys, training: True})\n    step_time = time.time() - current_time\n    current_time = time.time()\n    if i % 100 == 0:\n        batch_xs, batch_ys = mnist.validation.next_batch(batch_size)\nsummary.    _str = sess.run(merged, feed_dict={x: batch_xs, y_: batch_ys, training: False})\n        writer.summary.add_str, i)\n    print(loss, step_time)\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/data_util.py",
    "content": "import numpy as np\n\n\ndef batch(inputs):\n  batch_size = len(inputs)\n\n  document_sizes = np.array([len(doc) for doc in inputs], dtype=np.int32)\n  document_size = document_sizes.max()\n\n  sentence_sizes_ = [[len(sent) for sent in doc] for doc in inputs]\n  sentence_size = max(map(max, sentence_sizes_))\n\n  b = np.zeros(\n      shape=[batch_size, document_size, sentence_size],\n      dtype=np.int32)  # == PAD\n\n  sentence_sizes = np.zeros(shape=[batch_size, document_size], dtype=np.int32)\n  for i, document in enumerate(inputs):\n    for j, sentence in enumerate(document):\n      sentence_sizes[i, j] = sentence_sizes_[i][j]\n      for k, word in enumerate(sentence):\n        b[i, j, k] = word\n\n  return b, document_sizes, sentence_sizes\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/model_components.py",
    "content": "import tensorflow as tf\nimport tensorflow.contrib.layers as layers\n\ntry:\n  from tensorflow.contrib.rnn import LSTMStateTuple\nexcept ImportError:\n  LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple\n\n\ndef bidirectional_rnn(cell_fw,\n                      cell_bw,\n                      inputs_embedded,\n                      input_lengths,\n                      scope=None):\n  \"\"\"Bidirecional RNN with concatenated outputs and states\"\"\"\n  with tf.variable_scope(scope or 'birnn') as scope:\n    ((fw_outputs, bw_outputs), (fw_state, bw_state)) = (\n        tf.nn.bidirectional_dynamic_rnn(\n            cell_fw=cell_fw,\n            cell_bw=cell_bw,\n            inputs=inputs_embedded,\n            sequence_length=input_lengths,\n            dtype=tf.float32,\n            swap_memory=True,\n            scope=scope))\n    outputs = tf.concat((fw_outputs, bw_outputs), 2)\n\n    def concatenate_state(fw_state, bw_state):\n      if isinstance(fw_state, LSTMStateTuple):\n        state_c = tf.concat((fw_state.c, bw_state.c),\n                            1,\n                            name='bidirectional_concat_c')\n        state_h = tf.concat((fw_state.h, bw_state.h),\n                            1,\n                            name='bidirectional_concat_h')\n        state = LSTMStateTuple(c=state_c, h=state_h)\n        return state\n      elif isinstance(fw_state, tf.Tensor):\n        state = tf.concat((fw_state, bw_state), 1, name='bidirectional_concat')\n        return state\n      elif (isinstance(fw_state, tuple) and isinstance(bw_state, tuple) and\n            len(fw_state) == len(bw_state)):\n        # multilayer\n        state = tuple(\n            concatenate_state(fw, bw) for fw, bw in zip(fw_state, bw_state))\n        return state\n\n      else:\n        raise ValueError('unknown state type: {}'.format((fw_state, bw_state)))\n\n    state = concatenate_state(fw_state, bw_state)\n    return outputs, state\n\n\ndef task_specific_attention(inputs,\n                            output_size,\n                            initializer=layers.xavier_initializer(),\n                            activation_fn=tf.tanh,\n                            scope=None):\n  \"\"\"\n    Performs task-specific attention reduction, using learned\n    attention context vector (constant within task of interest).\n\n    Args:\n        inputs: Tensor of shape [batch_size, units, input_size] `input_size`\n          must be static (known) `units` axis will be attended over (reduced\n          from output) `batch_size` will be preserved\n        output_size: Size of output's inner (feature) dimension\n\n    Returns:\n        outputs: Tensor of shape [batch_size, output_dim].\n    \"\"\"\n  assert len(\n      inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None\n\n  with tf.variable_scope(scope or 'attention') as scope:\n    attention_context_vector = tf.get_variable(\n        name='attention_context_vector',\n        shape=[output_size],\n        initializer=initializer,\n        dtype=tf.float32)\n    input_projection = layers.fully_connected(\n        inputs, output_size, activation_fn=activation_fn, scope=scope)\n\n    vector_attn = tf.reduce_sum(\n        tf.multiply(input_projection, attention_context_vector),\n        axis=2,\n        keep_dims=True)\n    attention_weights = tf.nn.softmax(vector_attn, dim=1)\n    weighted_projection = tf.multiply(input_projection, attention_weights)\n\n    outputs = tf.reduce_sum(weighted_projection, axis=1)\n\n    return outputs\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/requirements.txt",
    "content": "cymem==1.31.2\ncytoolz==0.8.2\ndill==0.2.7.1\nen-core-web-sm==2.0.0\nmsgpack-numpy==0.4.1\nmsgpack-python==0.5.6\nmurmurhash==0.28.0\nnumpy==1.22.0\npathlib==1.0.1\nplac==0.9.6\npreshed==1.0.0\nregex==2017.4.5\nsix==1.11.0\nspacy==2.0.11\ntermcolor==1.1.0\nthinc==6.10.2\ntoolz==0.9.0\ntqdm==4.22.0\nujson==5.4.0\nwrapt==1.10.11\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/worker.py",
    "content": "#!/usr/bin/env python3\nimport argparse\nparser = argparse.ArgumentParser()\nparser.add_argument('--task', default='yelp', choices=['yelp'])\nparser.add_argument('--mode', default='train', choices=['train', 'eval'])\nparser.add_argument('--checkpoint-frequency', type=int, default=100)\nparser.add_argument('--eval-frequency', type=int, default=10000)\nparser.add_argument('--batch-size', type=int, default=30)\nparser.add_argument('--device', default='/cpu:0')\nparser.add_argument('--max-grad-norm', type=float, default=5.0)\nparser.add_argument('--lr', type=float, default=0.001)\nargs = parser.parse_args()\n\nimport importlib\nimport os\nimport pickle\nimport random\nimport time\nfrom collections import Counter, defaultdict\n\nimport numpy as np\nimport pandas as pd\nimport spacy\nimport tensorflow as tf\nfrom tensorflow.contrib.tensorboard.plugins import projector\nfrom tqdm import tqdm\n\nimport ujson\nfrom data_util import batch\n\ntask_name = args.task\n\ntask = importlib.import_module(task_name)\n\ncheckpoint_dir = os.path.join(task.train_dir, 'checkpoint')\ntflog_dir = os.path.join(task.train_dir, 'tflog')\ncheckpoint_name = task_name + '-model'\ncheckpoint_dir = os.path.join(task.train_dir, 'checkpoints')\ncheckpoint_path = os.path.join(checkpoint_dir, checkpoint_name)\n\n# @TODO: move calculation into `task file`\ntrainset = task.read_trainset(epochs=1)\nclass_weights = pd.Series(Counter([l for _, l in trainset]))\nclass_weights = 1 / (class_weights / class_weights.mean())\nclass_weights = class_weights.to_dict()\n\nvocab = task.read_vocab()\nlabels = task.read_labels()\n\nclasses = max(labels.values()) + 1\nvocab_size = task.vocab_size\n\nlabels_rev = {int(v): k for k, v in labels.items()}\nvocab_rev = {int(v): k for k, v in vocab.items()}\n\nNUM_RNN_LAYERS = 5\n\n\ndef HAN_model_1(session, restore_only=False):\n  \"\"\"Hierarhical Attention Network\"\"\"\n  import tensorflow as tf\n  try:\n    from tensorflow.contrib.rnn import GRUCell, MultiRNNCell, DropoutWrapper\n  except ImportError:\n    MultiRNNCell = tf.nn.rnn_cell.MultiRNNCell\n    GRUCell = tf.nn.rnn_cell.GRUCell\n  from bn_lstm import BNLSTMCell\n  from HAN_model import HANClassifierModel\n\n  is_training = tf.placeholder(dtype=tf.bool, name='is_training')\n\n  def bn_cell():\n    return BNLSTMCell(80, is_training)  # h-h batchnorm LSTMCell\n\n  # cell = GRUCell(30)\n  fw_word_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])\n  bw_word_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])\n  fw_sentence_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])\n  bw_sentence_cell = MultiRNNCell([bn_cell() for _ in range(NUM_RNN_LAYERS)])\n\n  model = HANClassifierModel(\n      vocab_size=vocab_size,\n      embedding_size=200,\n      classes=classes,\n      fw_word_cell=fw_word_cell,\n      bw_word_cell=bw_word_cell,\n      fw_sentence_cell=fw_sentence_cell,\n      bw_sentence_cell=bw_sentence_cell,\n      word_output_size=100,\n      sentence_output_size=100,\n      device=args.device,\n      learning_rate=args.lr,\n      max_grad_norm=args.max_grad_norm,\n      dropout_keep_proba=0.5,\n      is_training=is_training,\n  )\n\n  saver = tf.train.Saver(tf.global_variables())\n  checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)\n  if checkpoint:\n    print('Reading model parameters from %s' % checkpoint.model_checkpoint_path)\n    saver.restore(session, checkpoint.model_checkpoint_path)\n  elif restore_only:\n    raise FileNotFoundError('Cannot restore model')\n  else:\n    print('Created model with fresh parameters')\n    session.run(tf.global_variables_initializer())\n  # tf.get_default_graph().finalize()\n  return model, saver\n\n\nmodel_fn = HAN_model_1\n\n\ndef decode(ex):\n  print('text: ' + '\\n'.join(\n      [' '.join([vocab_rev.get(wid, '<?>')\n                 for wid in sent])\n       for sent in ex[0]]))\n  print('label: ', labels_rev[ex[1]])\n\n\nprint('data loaded')\n\n\ndef batch_iterator(dataset, batch_size, max_epochs):\n  for i in range(max_epochs):\n    xb = []\n    yb = []\n    for ex in dataset:\n      x, y = ex\n      xb.append(x)\n      yb.append(y)\n      if len(xb) == batch_size:\n        yield xb, yb\n        xb, yb = [], []\n\n\ndef ev(session, model, dataset):\n  predictions = []\n  labels = []\n  examples = []\n  for x, y in tqdm(batch_iterator(dataset, args.batch_size, 1)):\n    examples.extend(x)\n    labels.extend(y)\n    predictions.extend(\n        session.run(model.prediction, model.get_feed_data(x,\n                                                          is_training=False)))\n\n  df = pd.DataFrame({\n      'predictions': predictions,\n      'labels': labels,\n      'examples': examples\n  })\n  return df\n\n\ndef evaluate(dataset):\n  tf.reset_default_graph()\n  config = tf.ConfigProto(allow_soft_placement=True)\n  with tf.Session(config=config) as s:\n    model, _ = model_fn(s, restore_only=True)\n    df = ev(s, model, dataset)\n  print((df['predictions'] == df['labels']).mean())\n  import IPython\n  IPython.embed()\n\n\ndef train():\n  tf.reset_default_graph()\n\n  config = tf.ConfigProto(allow_soft_placement=True)\n\n  with tf.Session(config=config) as s:\n    model, saver = model_fn(s)\n    summary_writer = tf.summary.FileWriter(\n        tflog_dir, graph=tf.get_default_graph())\n\n    # Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto\n    # pconf = projector.ProjectorConfig()\n\n    # # You can add multiple embeddings. Here we add only one.\n    # embedding = pconf.embeddings.add()\n    # embedding.tensor_name = m.embedding_matrix.name\n\n    # # Link this tensor to its metadata file (e.g. labels).\n    # embedding.metadata_path = vocab_tsv\n\n    # print(embedding.tensor_name)\n\n    # Saves a configuration file that TensorBoard will read during startup.\n\n    for i, (x, y) in enumerate(\n        batch_iterator(task.read_trainset(epochs=3), args.batch_size, 300)):\n      fd = model.get_feed_data(x, y, class_weights=class_weights)\n\n      # import IPython\n      # IPython.embed()\n\n      t0 = time.clock()\n      step, summaries, loss, accuracy, _ = s.run([\n          model.global_step,\n          model.summary_op,\n          model.loss,\n          model.accuracy,\n          model.train_op,\n      ], fd)\n      td = time.clock() - t0\n\n      summary_writer.add_summary(summaries, global_step=step)\n      # projector.visualize_embeddings(summary_writer, pconf)\n\n      if step % 1 == 0:\n        print('step %s, loss=%s, accuracy=%s, t=%s, inputs=%s' %\n              (step, loss, accuracy, round(td, 2), fd[model.inputs].shape))\n      if step != 0 and step % args.checkpoint_frequency == 0:\n        print('checkpoint & graph meta')\n        saver.save(s, checkpoint_path, global_step=step)\n        print('checkpoint done')\n      if step != 0 and step % args.eval_frequency == 0:\n        print('evaluation at step %s' % i)\n        dev_df = ev(s, model, task.read_devset(epochs=1))\n        print('dev accuracy: %.2f' %\n              (dev_df['predictions'] == dev_df['labels']).mean())\n\n\ndef main():\n  if args.mode == 'train':\n    train()\n  elif args.mode == 'eval':\n    evaluate(task.read_devset(epochs=1))\n\n\nif __name__ == '__main__':\n  main()\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/yelp.py",
    "content": "import os\nimport pickle\n\ntrain_dir = os.path.join(os.path.curdir, 'yelp')\ndata_dir = os.path.join(train_dir, 'data')\n\nfor dir in [train_dir, data_dir]:\n  if not os.path.exists(dir):\n    os.makedirs(dir)\n\ntrainset_fn = os.path.join(data_dir, 'train.dataset')\ndevset_fn = os.path.join(data_dir, 'dev.dataset')\ntestset_fn = os.path.join(data_dir, 'test.dataset')\nvocab_fn = os.path.join(data_dir, 'vocab.pickle')\n\nreserved_tokens = 5\nunknown_id = 2\n\nvocab_size = 50001\n\n\ndef _read_dataset(fn, review_max_sentences=30, sentence_max_length=30,\n                  epochs=1):\n  c = 0\n  while 1:\n    c += 1\n    if epochs > 0 and c > epochs:\n      return\n    print('epoch %s' % c)\n    with open(fn, 'rb') as f:\n      try:\n        while 1:\n          x, y = pickle.load(f)\n\n          # clip review to specified max lengths\n          x = x[:review_max_sentences]\n          x = [sent[:sentence_max_length] for sent in x]\n\n          y -= 1\n          assert y >= 0 and y <= 4\n          yield x, y\n      except EOFError:\n        continue\n\n\ndef read_trainset(epochs=1):\n  return _read_dataset(trainset_fn, epochs=epochs)\n\n\ndef read_devset(epochs=1):\n  return _read_dataset(devset_fn, epochs=epochs)\n\n\ndef read_vocab():\n  with open(vocab_fn, 'rb') as f:\n    return pickle.load(f)\n\n\ndef read_labels():\n  return {i: i for i in range(5)}\n"
  },
  {
    "path": "hierarchical_attention_research/han_model/yelp_prepare.py",
    "content": "import argparse\nparser = argparse.ArgumentParser()\nparser.add_argument('review_path')\nargs = parser.parse_args()\n\nimport os\nimport ujson as json\nimport spacy\nimport pickle\nimport random\nfrom tqdm import tqdm\nfrom collections import defaultdict\nimport numpy as np\nfrom yelp import *\n\nen = spacy.load('en')\n\n\ndef read_reviews():\n  with open(args.review_path, 'rb') as f:\n    for line in f:\n      yield json.loads(line)\n\n\ndef build_word_frequency_distribution():\n  path = os.path.join(data_dir, 'word_freq.pickle')\n\n  try:\n    with open(path, 'rb') as freq_dist_f:\n      freq_dist_f = pickle.load(freq_dist_f)\n      print('frequency distribution loaded')\n      return freq_dist_f\n  except IOError:\n    pass\n\n  print('building frequency distribution')\n  freq = defaultdict(int)\n  for i, review in enumerate(read_reviews()):\n    doc = en.tokenizer(review['text'])\n    for token in doc:\n      freq[token.orth_] += 1\n    if i % 10000 == 0:\n      with open(path, 'wb') as freq_dist_f:\n        pickle.dump(freq, freq_dist_f)\n      print('dump at {}'.format(i))\n  return freq\n\n\ndef build_vocabulary(lower=3, n=50000):\n  try:\n    with open(vocab_fn, 'rb') as vocab_file:\n      vocab = pickle.load(vocab_file)\n      print('vocabulary loaded')\n      return vocab\n  except IOError:\n    print('building vocabulary')\n  freq = build_word_frequency_distribution()\n  top_words = list(sorted(freq.items(), key=lambda x: -x[1]))[:n - lower + 1]\n  vocab = {}\n  i = lower\n  for w, freq in top_words:\n    vocab[w] = i\n    i += 1\n  with open(vocab_fn, 'wb') as vocab_file:\n    pickle.dump(vocab, vocab_file)\n  return vocab\n\n\nUNKNOWN = 2\n\n\ndef make_data(split_points=(0.8, 0.94)):\n  train_ratio, dev_ratio = split_points\n  vocab = build_vocabulary()\n  train_f = open(trainset_fn, 'wb')\n  dev_f = open(devset_fn, 'wb')\n  test_f = open(testset_fn, 'wb')\n\n  try:\n    for review in tqdm(read_reviews()):\n      x = []\n      for sent in en(review['text']).sents:\n        x.append([vocab.get(tok.orth_, UNKNOWN) for tok in sent])\n      y = review['stars']\n\n      r = random.random()\n      if r < train_ratio:\n        f = train_f\n      elif r < dev_ratio:\n        f = dev_f\n      else:\n        f = test_f\n      pickle.dump((x, y), f)\n  except KeyboardInterrupt:\n    pass\n\n  train_f.close()\n  dev_f.close()\n  test_f.close()\n\n\nif __name__ == '__main__':\n  make_data()\n"
  },
  {
    "path": "kaggle-classification/.gitignore",
    "content": "# Directories to save model checkpoints\nruns/\nmodel/*\nsaved_models/*\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# Python virtual environment directory\nenv/\n\n# Don't version control data: this is the directory where data is downloaded to.\nlocal_data/\n\n# Temporary directory for hacking stuff in\ntmp/\n\n# Comet API key\ncomet_api_key.txt\n"
  },
  {
    "path": "kaggle-classification/README.md",
    "content": "# Toxic Comment Classification Kaggle Challenge\n\nThis directory is a place to play around with solutions for the\n[Toxic Comment Classification Kaggle challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge).\nThe challenge was created by the Jigsaw Conversation AI team in December 2017\nand the it ends in February 2018.\n\nThese models are meant to be simple baselines created independently from the\nGoogle infrastructure.\n\n\n## To Run Locally\n\n1.  Setup a (virtualenv)[https://virtualenvwrapper.readthedocs.io/en/latest/] for\n    the project (recommended, but technically optional).\n\n    Python 2:\n\n    ```\n    python -m virtualenv env\n    ```\n\n    Python 3:\n\n    ```\n    python3 -m venv env\n    ```\n\n    From either to enter your virtual env:\n\n    ```shell\n    source env/bin/activate\n    ```\n\n2.  Install library dependencies:\n\n    ```shell\n    pip install -r requirements.txt\n    ```\n\n3.  For training locally, download the training (`train.csv`) and test\n    (`test.csv`) data from the\n    [Kaggle challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/data).\n\n    If you have [a Kaggle API Key](https://github.com/Kaggle/kaggle-api#api-credentials)\n    setup, you can use the [Kaggle api tool](https://github.com/Kaggle/kaggle-api)\n    to download these files by running:\n\n    ```shell\n    kaggle competitions download -c jigsaw-toxic-comment-classification-challenge -p ./\n    mv jigsaw-toxic-comment-classification-challenge local_data\n    for z in local_data/*.zip; do unzip -x $z -d local_data/; done\n    ```\n\n    Note: the `kaggle` command is installed from the `pip` and specified in\n    `requirements.txt`.\n\n4.  Run a model on a given class (e.g. 'toxic' or 'obscene'). There are examples\n    of how to run the model locally and using ml-engine in `bin/run_local` and\n    `bin/run` respectively.\n\n    Note: to run in google cloud, you will need to be authenticated with\n    Google Cloud (you can run `gcloud auth application-default login` to do\n    this) and you must have access to the cloud bucket where the data is located\n    (you can test this by running `gcloud storage ls  gs://kaggle-model-experiments/`).\n\n\n## Available Models\n  * `bag_of_words` - bag of words model with a learned word-embedding layer\n  * `cnn` - a 2 layer ConvNet\n\n\n## Data\n\nCopies of the training and test data are available in Google Storage from the\nwikidetox project.\n\n* train.csv: gs://kaggle-model-experiments/train.csv\n* test.csv: gs://kaggle-model-experiments/test.csv\n"
  },
  {
    "path": "kaggle-classification/__init__.py",
    "content": ""
  },
  {
    "path": "kaggle-classification/bin/cancel-job",
    "content": "#!/bin/bash\n\ngcloud ml-engine jobs cancel $1\n"
  },
  {
    "path": "kaggle-classification/bin/ls-jobs",
    "content": "#!/bin/bash\n\nDATE=`date '+%Y-%m-%d'`\n\ngcloud ml-engine jobs list | grep $DATE\n"
  },
  {
    "path": "kaggle-classification/bin/run",
    "content": "#!/bin/bash\n\n#\n# A script to train the kaggle model remotely using ml-engine.\n#\n# To run with default hyperparameters from the kaggle-classification directory just enter:\n# './bin/run'\n#\n# To run with hyperparameter tuning, enter:\n# './bin/run -c hparam_config.yaml'\n# \n#\n# Setup Steps:\n# 1. Install the gcloud SDK\n# 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]`\n# 3. Put the train and test data in Cloud Storage, `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/`\n#\n\n# Edit these!\nBUCKET_NAME=kaggle-model-experiments\nCONFIG=gpu_config.yaml\nJOB_NAME=${USER}_kaggle_training\n# Note: this must be compatible with cells that have GPUs. us-central1 works.\n# See: https://cloud.google.com/ml-engine/docs/using-gpus\nREGION=us-central1\nDATE=`date '+%Y%m%d_%H%M%S'`\nOUTPUT_PATH=gs://${BUCKET_NAME}/models/${USER}/${DATE}\n\nwhile getopts :c:h opt; do\ncase ${opt} in\nh) \n    echo \"Usage: run [-c config_filename.yaml]\"\n    echo \"Flags: \"\n    echo -e \" -c Specify a config file (e.g. use hparam_config to enable hyperparameter tuning)\"\n    exit 0;;\nc) \n    echo \"Using custom config ${OPTARG}\"\n    CONFIG=${OPTARG};;\n:)\n    echo \"Error: ${OPTARG} requires an argument.\"\n    echo \"Use 'run -h' for help.\"\n    exit 1;;\n\\?) \n    echo \"Invalid flag. Use 'run -h' for help.\"\n    exit 1;;\nesac\ndone\n\n\necho \"Writing to $OUTPUT_PATH\"\n\n# Remote\ngcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \\\n    --job-dir ${OUTPUT_PATH} \\\n    --runtime-version 1.4 \\\n    --config ${CONFIG} \\\n    --module-name trainer.model \\\n    --package-path trainer/ \\\n    --region $REGION \\\n    --verbosity debug \\\n    -- \\\n    --train_data gs://${BUCKET_NAME}/train.csv \\\n    --y_class toxic \\\n    --train_steps 5000 \\\n    --saved_model_dir gs://${BUCKET_NAME}/saved_graph/${USER}/${DATE} \\\n    --model cnn\n\n\necho \"You can view the tensorboard for this job with the command:\"\necho \"\"\necho -e \"\\t tensorboard --logdir=${OUTPUT_PATH}\"\necho \"\"\necho \"And on your browser navigate to:\"\necho \"\"\necho -e \"\\t http://localhost:6006/#scalars\"\necho \"\"\necho \"This will populate after a model checkpoint is saved.\"\necho \"\"\n"
  },
  {
    "path": "kaggle-classification/bin/run_keras.sh",
    "content": "#!/bin/bash\n\n#\n# A script to train the kaggle model remotely using ml-engine.\n#\n# Setup Steps:\n# 1. Install the gcloud SDK\n# 2. Authenticate with the GCP project you want to use, `gcloud config set project [my-project]`\n# 3. Put the train and test data in Cloud Storage,\n#      `gcloud storage cp [DATA_FILE] gs://[BUCKET_NAME]/resources`\n#\n\n# Edit these!\nBUCKET_NAME=kaggle-model-experiments\nJOB_NAME=${USER}_kaggle_training\nREGION=us-east1\n\nINPUT_PATH=gs://${BUCKET_NAME}/resources\nDATE=`date '+%Y%m%d_%H%M%S'`\nOUTPUT_PATH=gs://${BUCKET_NAME}/keras_runs/${USER}/${DATE}\nLOG_PATH=${OUTPUT_PATH}/logs/\nHPARAM_CONFIG=keras_hparam_config.yaml\nCOMET_KEY_FILE='comet_api_key.txt'\nCOMET_KEY=$(cat ${COMET_KEY_FILE})\nCOMET_PROJECT_NAME='compare-models'\n\necho \"Writing to $OUTPUT_PATH\"\n\n# Remote\ngcloud ml-engine jobs submit training ${JOB_NAME}_${DATE} \\\n    --job-dir=$OUTPUT_PATH \\\n    --runtime-version=1.8 \\\n    --module-name=keras_trainer.model \\\n    --package-path=keras_trainer \\\n    --region=$REGION \\\n    --verbosity=debug \\\n    --config=${HPARAM_CONFIG} \\\n    -- \\\n    --train_path=${INPUT_PATH}/train.csv \\\n    --test_path=${INPUT_PATH}/validation.csv \\\n    --embeddings_path=${INPUT_PATH}/glove.6B/glove.6B.300d.txt \\\n    --log_path=${LOG_PATH} \\\n    --comet_key=${COMET_KEY} \\\n    --comet_project_name=${COMET_PROJECT_NAME} \\\n    --model_type=single_layer_cnn\n\necho \"You can view the tensorboard for this job with the command:\"\necho \"\"\necho -e \"\\t tensorboard --logdir=${LOG_PATH}\"\necho \"\"\necho \"And on your browser navigate to:\"\necho \"\"\necho -e \"\\t http://localhost:6006/#scalars\"\necho \"\"\necho \"This will populate after a model checkpoint is saved.\"\necho \"\"\n"
  },
  {
    "path": "kaggle-classification/bin/run_keras_local.sh",
    "content": "#!/bin/bash\n\nDATE=`date '+%Y%m%d_%H%M%S'`\nOUTPUT_PATH=runs/${DATE}\nINPUT_PATH=local_data\nLOG_PATH=${OUTPUT_PATH}/logs/\nCOMET_KEY_FILE='comet_api_key.txt'\nCOMET_KEY=$(cat ${COMET_KEY_FILE})\nCOMET_PROJECT_NAME='compare-models'\n\necho \"You can view the tensorboard for this job with the command:\"\necho \"\"\necho -e \"\\t tensorboard --logdir=${LOG_PATH}\"\necho \"\"\necho \"And on your browser navigate to:\"\necho \"\"\necho -e \"\\t http://localhost:6006/#scalars\"\necho \"\"\necho \"This will populate after a model checkpoint is saved.\"\necho \"\"\n\npython -m keras_trainer.model \\\n       --train_path=${INPUT_PATH}/train.csv \\\n       --test_path=${INPUT_PATH}/validation.csv \\\n       --embeddings_path=${INPUT_PATH}/glove.6B/glove.6B.100d.txt \\\n       --job-dir=${OUTPUT_PATH} \\\n       --log_path=${LOG_PATH} \\\n       --comet_key=${COMET_KEY} \\\n       --comet_project_name=${COMET_PROJECT_NAME} \\\n       --model_type=rnn\n"
  },
  {
    "path": "kaggle-classification/bin/run_local",
    "content": "#!/bin/bash\n\n#\n# A script to train the kaggle model locally.\n# Assumes that train.csv and test.csv are downloaded into the local_data/\n# directory.\n#\nDATE=`date '+%Y%m%d_%H%M%S'`\n\ngcloud ml-engine local train \\\n     --module-name=trainer.model \\\n     --package-path=trainer \\\n     --job-dir=model/${DATE} -- \\\n     --train_data=local_data/train.csv \\\n     --y_class=toxic \\\n     --train_steps=100\n"
  },
  {
    "path": "kaggle-classification/bin/stream-logs",
    "content": "#!/bin/bash\n\ngcloud ml-engine jobs stream-logs $1\n"
  },
  {
    "path": "kaggle-classification/config.yaml",
    "content": "trainingInput:\n  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.\n  scaleTier: BASIC_GPU\n  ## Custom scaleTier needed for using > 1 GPU machines.\n  # scaleTier: CUSTOM\n  # masterType: complex_model_m_gpu\n  # workerType: complex_model_m_gpu\n  # parameterServerType: large_model\n  # workerCount: 9\n  # parameterServerCount: 3\n"
  },
  {
    "path": "kaggle-classification/gpu_config.yaml",
    "content": "trainingInput:\n  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.\n  scaleTier: BASIC_GPU\n  ## Custom scaleTier needed for using > 1 GPU machines.\n  # scaleTier: CUSTOM\n  # masterType: complex_model_m_gpu\n  # workerType: complex_model_m_gpu\n  # parameterServerType: large_model\n  # workerCount: 9\n  # parameterServerCount: 3\n"
  },
  {
    "path": "kaggle-classification/hparam_config.yaml",
    "content": "trainingInput:\n  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.\n  scaleTier: BASIC_GPU\n  ## Custom scaleTier needed for using > 1 GPU machines.\n  # scaleTier: CUSTOM\n  # masterType: complex_model_m_gpu\n  # workerType: complex_model_m_gpu\n  # parameterServerType: large_model\n  # workerCount: 9\n  # parameterServerCount: 3\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: auc\n    maxTrials: 100\n    maxParallelTrials: 10\n    enableTrialEarlyStopping: TRUE\n    params:\n      - parameterName: embedding_size\n        type: INTEGER\n        minValue: 50\n        maxValue: 200\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: num_filters\n        type: INTEGER\n        minValue: 10\n        maxValue: 200\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: dropout_keep_prob\n        type: DOUBLE\n        minValue: 0.5\n        maxValue: 1\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: learning_rate\n        type: DOUBLE\n        minValue: 0.000001\n        maxValue: 0.1\n        scaleType: UNIT_LOG_SCALE\n"
  },
  {
    "path": "kaggle-classification/keras_hparam_config.yaml",
    "content": "trainingInput:\n  ## BASIC_GPU uses single NVIDIA Tesla K80 GPU.\n  pythonVersion: '3.5'\n  scaleTier: BASIC_GPU\n  ## Custom scaleTier needed for using > 1 GPU machines.\n  # scaleTier: CUSTOM\n  # masterType: complex_model_m_gpu\n  # workerType: complex_model_m_gpu\n  # parameterServerType: large_model\n  # workerCount: 9\n  # parameterServerCount: 3\n  hyperparameters:\n    goal: MAXIMIZE\n    hyperparameterMetricTag: val_auc_roc\n    maxTrials: 20\n    maxParallelTrials: 3\n    enableTrialEarlyStopping: TRUE\n    params:\n      - parameterName: learning_rate \n        type: DOUBLE\n        minValue: 0.00005\n        maxValue: 0.1\n        scaleType: UNIT_LOG_SCALE\n      - parameterName: dropout_rate\n        type: DOUBLE\n        minValue: 0\n        maxValue: 1\n        scaleType: UNIT_LINEAR_SCALE\n      - parameterName: batch_size\n        type: DISCRETE\n        discreteValues:\n        - 16\n        - 32\n        - 64\n        - 128\n        - 256\n"
  },
  {
    "path": "kaggle-classification/keras_trainer/__init__.py",
    "content": ""
  },
  {
    "path": "kaggle-classification/keras_trainer/base_model.py",
    "content": "\"\"\"Base model class used by the ModelRunner\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom abc import ABCMeta, abstractmethod\nfrom keras.layers import Input\nfrom keras.models import Model\n\n\nclass BaseModel(metaclass=ABCMeta):\n  \"\"\"Base class for model runner\"\"\"\n\n  @abstractmethod\n  def get_model(self) -> Model:\n    raise NotImplementedError('Method get_model needs to be implemented.')\n"
  },
  {
    "path": "kaggle-classification/keras_trainer/cnn_with_attention.py",
    "content": "\"\"\"Model class for a single layer CNN\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom keras.layers import Conv1D\nfrom keras.layers import Dense\nfrom keras.layers import Dropout\nfrom keras.layers import Embedding\nfrom keras.layers import Flatten\nfrom keras.layers import Input\nfrom keras.layers import AveragePooling1D\nfrom keras.layers import Activation\nfrom keras.layers import Concatenate\nfrom keras.layers import Multiply\nfrom keras.models import Model\nfrom keras.layers import Permute\nfrom keras_trainer import base_model\nfrom keras.layers import Activation\nfrom keras_trainer.custom_metrics import auc_roc\n\n\nclass CNNWithAttention(base_model.BaseModel):\n  \"\"\"Single Layer Based CNN\n\n  hparams:\n    embedding_dim\n    vocab_size\n    sequence_length\n    dropout_rate\n    train_embedding\n  \"\"\"\n\n  def __init__(self, embeddings_matrix, hparams, labels):\n    self.embeddings_matrix = embeddings_matrix\n    self.hparams = hparams\n    self.labels = labels\n    self.num_labels = len(labels)\n\n  def get_model(self):\n    I = Input(shape=(self.hparams.sequence_length,), dtype='float32')\n    E = Embedding(\n        self.hparams.vocab_size,\n        self.hparams.embedding_dim,\n        weights=[self.embeddings_matrix],\n        input_length=self.hparams.sequence_length,\n        trainable=self.hparams.train_embedding)(\n            I)\n    C = []\n    A = []\n    P = []\n    for i, size in enumerate(self.hparams.filter_sizes):\n      C.append(\n          Conv1D(\n              self.hparams.num_filters[i],\n              size,\n              activation='relu',\n              padding='same')(E))\n      A.append(\n          Dense(self.hparams.attention_intermediate_size,\n                activation='relu')(C[i]))\n      A[i] = Dense(1, use_bias=False)(A[i])\n      # Permute trick to apply softmax to second to last layer.\n      A[i] = Permute((2, 1))(A[i])\n      A[i] = Activation('softmax')(A[i])\n      A[i] = Permute((2, 1))(A[i])\n      P.append(Multiply()([A[i], C[i]]))\n      P[i] = AveragePooling1D(\n          self.hparams.sequence_length, padding='same')(\n              P[i])\n    X = Concatenate(axis=-1)(P)\n    X = Flatten()(X)\n    X = Dropout(self.hparams.dropout_rate)(X)\n    X = Dense(128, activation='relu')(X)\n    X = Dropout(self.hparams.dropout_rate)(X)\n    Output = Dense(self.num_labels, activation='sigmoid', name='outputs')(X)\n\n    model = Model(inputs=I, outputs=Output)\n    model.compile(\n        optimizer='rmsprop',\n        loss='binary_crossentropy',\n        metrics=['accuracy', auc_roc])\n    print(model.summary())\n    return model\n"
  },
  {
    "path": "kaggle-classification/keras_trainer/custom_metrics.py",
    "content": "\"\"\"Custom metrics used by Keras models.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\n\ndef auc_roc(y_true, y_pred):\n  # any tensorflow metric\n  y_true = tf.to_int32(tf.greater(y_true, 0.5))\n  value, update_op = tf.metrics.auc(y_true, y_pred)\n\n  # find all variables created for this metric\n  metric_vars = [\n      i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1]\n  ]\n\n  # Add metric variables to GLOBAL_VARIABLES collection.\n  # They will be initialized for new session.\n  for v in metric_vars:\n    tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)\n\n  # force update metric values\n  with tf.control_dependencies([update_op]):\n    value = tf.identity(value)\n    return value\n"
  },
  {
    "path": "kaggle-classification/keras_trainer/model.py",
    "content": "\"\"\"Classifiers for the Toxic Comment Classification Kaggle challenge, https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge\n\nTo run locally:\n  python keras-trainer/model.py\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport argparse\nimport json\nimport numpy as np\nimport pandas as pd\nimport os\nimport os.path\nfrom comet_ml import Experiment\nimport tensorflow as tf\nfrom keras.callbacks import EarlyStopping\nfrom keras.callbacks import ModelCheckpoint\nfrom keras.callbacks import TensorBoard\nfrom keras.models import load_model\nfrom keras.preprocessing.sequence import pad_sequences\nfrom keras.preprocessing.text import Tokenizer\nfrom os.path import expanduser\nfrom sklearn import metrics\nfrom tensorflow.python.framework.errors_impl import NotFoundError\nfrom keras_trainer.cnn_with_attention import CNNWithAttention\nfrom keras_trainer.single_layer_cnn import SingleLayerCnn\nfrom keras_trainer.rnn import RNNModel\nfrom keras_trainer.custom_metrics import auc_roc\nfrom keras_trainer.base_model import BaseModel\nfrom typing import Dict, Type\n\nFLAGS = None\n\nTEMPORARY_MODEL_PATH = 'model.h5'\n\nVALID_MODELS = {\n    'cnn_with_attention': CNNWithAttention,\n    'single_layer_cnn': SingleLayerCnn,\n    'rnn': RNNModel\n}  # type: Dict[str, Type[BaseModel]]\n\nDEFAULT_HPARAMS = tf.contrib.training.HParams(\n    learning_rate=0.00005,\n    dropout_rate=0.5,\n    batch_size=128,\n    epochs=1,\n    sequence_length=250,\n    embedding_dim=300,\n    train_embedding=False,\n    model_type='single_layer_cnn',\n    filter_sizes=[3, 4, 5],\n    num_filters=[128, 128, 128],\n    attention_intermediate_size=128)\n\n\nclass ModelRunner():\n  \"\"\"Toxicity model using CNN + Attention\"\"\"\n\n  def __init__(self, job_dir, embeddings_path, log_path, hparams, labels):\n    if os.path.exists(TEMPORARY_MODEL_PATH):\n      raise FileExistsError('The following file path already exists: {}'.format(\n          TEMPORARY_MODEL_PATH))\n\n    self.job_dir = job_dir\n    self.model_path = os.path.join(job_dir, 'model.h5')\n    self.embeddings_path = embeddings_path\n    self.log_path = log_path\n    self.hparams = hparams\n    self.labels = [l.strip() for l in labels.split(',')]\n    print('Setting up tokenizer...')\n    self.tokenizer = self._setup_tokenizer()\n    print('Setting up embedding matrix...')\n    self.embeddings_matrix = self._setup_embeddings_matrix()\n    print('Loading model...')\n    self._load_model()\n\n  def train(self, train):\n    if self.hparams.model_type in VALID_MODELS:\n      model = VALID_MODELS[self.hparams.model_type](\n          self.embeddings_matrix, self.hparams,\n          self.labels).get_model()  # type: BaseModel\n    else:\n      raise ValueError('You have specified an invalid model type.')\n\n    train_comment = self._prep_texts(train['comment_text'])\n    train_labels = np.array(list(zip(*[train[label] for label in self.labels])))\n\n    callbacks = [\n        ModelCheckpoint(\n            TEMPORARY_MODEL_PATH, save_best_only=True, verbose=True),\n        EarlyStopping(monitor='val_loss', mode='auto'),\n        TensorBoard(self.log_path)\n    ]\n\n    model.fit(\n        x=train_comment,\n        y=train_labels,\n        batch_size=int(self.hparams.batch_size),\n        epochs=self.hparams.epochs,\n        validation_split=0.1,\n        callbacks=callbacks,\n        verbose=2)  # Output one line per epoch\n\n    # Necessary because we can't save h5 files to cloud storage directly via\n    # Checkpoint.\n    tf.gfile.MakeDirs(self.job_dir)\n    tf.gfile.Copy(TEMPORARY_MODEL_PATH, self.model_path, overwrite=True)\n    tf.gfile.Remove(TEMPORARY_MODEL_PATH)\n    print('Saved model to {}'.format(self.model_path))\n\n    self._load_model()\n\n  def predict(self, texts):\n    data = self._prep_texts(texts)\n    return self.model.predict(data)\n\n  def score_metric(self, data, metric_name, metric_fn):\n    \"\"\"Prints metric scores.\n\n    Args:\n      data: Dataset containing 'comment_text' column, that will be used to get\n        predictions, as well as label columns to compare the predictions\n        against.\n      metric_name (str): String to use when printing.\n      metric_fn: function that takes labels and predictions and outputs a score\n    \"\"\"\n\n    predictions = self.predict(data['comment_text'])\n    # Get an array where each element is a list of all the labels for the\n    # specific instance.\n    agg = {}\n    for label_idx, label in enumerate(self.labels):\n      labels = list((data[label] > 0.5).astype(int))\n      preds = predictions[:, label_idx]  # label and pred indicies better match\n      score = metric_fn(labels, preds)\n      agg[label] = score\n    print('{}: {}'.format(metric_name, agg))\n    if len(agg) > 1:\n      print('Mean {}: {}'.format(metric_name, np.mean(list(agg.values()))))\n\n  def score_auc(self, data):\n    self.score_metric(\n        data, 'ROC AUC', lambda l, p: metrics.roc_auc_score(l, p, average=None))\n\n  def score_precision(self, data):\n    self.score_metric(\n        data, 'Precision', lambda l, p: metrics.precision_score(\n            l, (p > 0.5).astype(int)))\n\n  def score_recall(self, data):\n    self.score_metric(\n        data,\n        'Recall', lambda l, p: metrics.recall_score(l, (p > 0.5).astype(int)))\n\n  def _prep_texts(self, texts):\n    return pad_sequences(\n        self.tokenizer.texts_to_sequences(texts),\n        maxlen=self.hparams.sequence_length)\n\n  def _load_model(self):\n    try:\n      tf.gfile.Copy(self.model_path, TEMPORARY_MODEL_PATH, overwrite=True)\n      self.model = load_model(\n          TEMPORARY_MODEL_PATH, custom_objects={'auc_roc': auc_roc})\n      tf.gfile.Remove(TEMPORARY_MODEL_PATH)\n      print('Model loaded from: {}'.format(self.model_path))\n    except NotFoundError:\n      print('Could not load model at: {}'.format(self.model_path))\n\n  def _setup_tokenizer(self):\n    words = []\n    with tf.gfile.Open(self.embeddings_path, 'r') as f:\n      for line in f:\n        words.append(line.split()[0])\n    tokenizer = Tokenizer(lower=True, oov_token='<unk>')\n    tokenizer.fit_on_texts(words)\n    self.hparams.vocab_size = len(tokenizer.word_index) + 1\n    return tokenizer\n\n  def _setup_embeddings_matrix(self):\n    embeddings_matrix = np.zeros((self.hparams.vocab_size,\n                                  self.hparams.embedding_dim))\n    with tf.gfile.Open(self.embeddings_path, 'r') as f:\n      for line in f:\n        values = line.split()\n        word = values[0]\n        if word in self.tokenizer.word_index:\n          word_idx = self.tokenizer.word_index[word]\n          word_embedding = np.asarray(values[1:], dtype='float32')\n          embeddings_matrix[word_idx] = word_embedding\n    embeddings_matrix[self.hparams.vocab_size - 1] = embeddings_matrix.mean(\n        axis=0)\n    return embeddings_matrix\n\n\nif __name__ == '__main__':\n\n  parser = argparse.ArgumentParser()\n  parser.add_argument(\n      '--train_path',\n      type=str,\n      default='local_data/train.csv',\n      help='Path to the training data.')\n  parser.add_argument(\n      '--test_path',\n      type=str,\n      default='local_data/validation.csv',\n      help='Path to the test data.')\n  parser.add_argument(\n      '--embeddings_path',\n      type=str,\n      default='local_data/glove.6B/glove.6B.100d.txt',\n      help='Path to the embeddings.')\n  parser.add_argument(\n      '--job-dir', type=str, default='local_data/', help='Path to model file.')\n  parser.add_argument(\n      '--log_path',\n      type=str,\n      default='local_data/logs/',\n      help='Path to write tensorboard logs.')\n  parser.add_argument(\n      '--comet_key',\n      type=str,\n      default=None,\n      help='Path to file containing comet.ml api key. Set to None to disable comet.ml.'\n  )\n  parser.add_argument(\n      '--comet_project_name',\n      type=str,\n      default=None,\n      help='Name of comet project that tracks results. Must be set if comet_key is.'\n  )\n  parser.add_argument(\n      '--labels',\n      default='toxic,severe_toxic,obscene,threat,insult,identity_hate',\n      help='A comma separated list of labels to predict.')\n  parser.add_argument(\n      '--model_type',\n      default='single_layer_cnn',\n      help='Model type. Valid choices are {}'.format(list(VALID_MODELS.keys())))\n\n  # Hyper-parameters\n  parser.add_argument(\n      '--learning_rate', type=float, default=0.00005, help='Learning rate.')\n  parser.add_argument(\n      '--dropout_rate', type=float, default=0.5, help='Dropout rate.')\n  parser.add_argument('--batch_size', type=int, default=64, help='Batch size.')\n\n  FLAGS = parser.parse_args()\n\n  hparams = DEFAULT_HPARAMS\n  hparams.learning_rate = FLAGS.learning_rate\n  hparams.dropout_rate = FLAGS.dropout_rate\n  hparams.batch_size = FLAGS.batch_size\n  hparams.model_type = FLAGS.model_type\n\n  if FLAGS.comet_key:\n    experiment = Experiment(\n        api_key=FLAGS.comet_key,\n        project_name=FLAGS.comet_project_name,\n        team_name='jigsaw',\n        auto_param_logging=False,\n        parse_args=False)\n    experiment.log_multiple_params(hparams.values())\n    experiment.log_parameter('train_data_path', FLAGS.train_path)\n    experiment.log_parameter('test_data_path', FLAGS.test_path)\n    experiment.log_parameter('embeddings_path', FLAGS.embeddings_path)\n    experiment.log_parameter('model_path', FLAGS.job_dir)\n    experiment.log_parameter('model', hparams.model_type)\n\n  # Used to scope logs to a given trial (when hyper param tuning) so that they\n  # don't run over each other. When running locally it will just use the passed\n  # in log path.\n  trial_log_path = os.path.join(\n      FLAGS.log_path,\n      json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get(\n          'trial', ''))\n\n  model = ModelRunner(\n      job_dir=FLAGS.job_dir,\n      embeddings_path=FLAGS.embeddings_path,\n      log_path=trial_log_path,\n      hparams=hparams,\n      labels=FLAGS.labels)\n  with tf.gfile.Open(FLAGS.train_path, 'rb') as f:\n    train = pd.read_csv(f, encoding='utf-8')\n  if FLAGS.comet_key:\n    experiment.log_dataset_hash(train)\n  model.train(train)\n\n  with tf.gfile.Open(FLAGS.test_path, 'rb') as f:\n    test_data = pd.read_csv(f, encoding='utf-8')\n  if FLAGS.comet_key:\n    experiment.log_metric('test_auc', model.score_auc(test_data))\n\n  model.predict(['This sentence is benign'])\n"
  },
  {
    "path": "kaggle-classification/keras_trainer/rnn.py",
    "content": "\"\"\"RNN\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom keras.layers import Input, GRU, Dense, Embedding, Dropout, Bidirectional, TimeDistributed, Flatten, Dot\nfrom keras.models import Model\nfrom keras_trainer import base_model\nfrom keras_trainer.custom_metrics import auc_roc\n\n\nclass RNNModel(base_model.BaseModel):\n  \"\"\" RNN\n\n  hparams:\n    embedding_dim\n    vocab_size\n    train_embedding\n  \"\"\"\n\n  def __init__(self, embeddings_matrix, hparams, labels):\n    self.embeddings_matrix = embeddings_matrix\n    self.hparams = hparams\n    self.labels = labels\n    self.num_labels = len(labels)\n\n  def get_model(self):\n    sequence_length = self.hparams.sequence_length\n\n    I = Input(shape=(sequence_length,), dtype='float32')\n    E = Embedding(\n        self.hparams.vocab_size,\n        self.hparams.embedding_dim,\n        weights=[self.embeddings_matrix],\n        input_length=sequence_length,\n        trainable=self.hparams.train_embedding)(\n            I)\n    H = Bidirectional(GRU(128, return_sequences=True))(E)\n    A = TimeDistributed(\n        Dense(128, activation='relu'), input_shape=(sequence_length, 256))(\n            H)\n    A = TimeDistributed(Dense(1, activation='softmax'))(H)\n    X = Dot((1, 1))([H, A])\n    X = Flatten()(X)\n    X = Dense(128, activation='relu')(X)\n    X = Dropout(self.hparams.dropout_rate)(X)\n    Output = Dense(self.num_labels, activation='sigmoid')(X)\n\n    model = Model(inputs=I, outputs=Output)\n    model.compile(\n        optimizer='rmsprop',\n        loss='binary_crossentropy',\n        metrics=['accuracy', auc_roc])\n\n    print(model.summary())\n    return model\n"
  },
  {
    "path": "kaggle-classification/keras_trainer/single_layer_cnn.py",
    "content": "\"\"\"Model class for a single layer CNN\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom keras.layers import Conv1D\nfrom keras.layers import Dense\nfrom keras.layers import Dropout\nfrom keras.layers import Embedding\nfrom keras.layers import Flatten\nfrom keras.layers import Input\nfrom keras.layers import MaxPooling1D\nfrom keras.layers import Activation\nfrom keras.layers import Concatenate\nfrom keras.optimizers import Adam\nfrom keras.models import Model\nfrom keras_trainer import base_model\nfrom keras_trainer.custom_metrics import auc_roc\n\n\nclass SingleLayerCnn(base_model.BaseModel):\n  \"\"\"Single Layer Based CNN\n\n  hparams:\n    embedding_dim\n    vocab_size\n    sequence_length\n    dropout_rate\n    train_embedding\n  \"\"\"\n\n  def __init__(self, embeddings_matrix, hparams, labels):\n    self.embeddings_matrix = embeddings_matrix\n    self.hparams = hparams\n    self.labels = labels\n    self.num_labels = len(labels)\n\n  def get_model(self) -> Model:\n    I = Input(shape=(self.hparams.sequence_length,), dtype='float32')\n    E = Embedding(\n        self.hparams.vocab_size,\n        self.hparams.embedding_dim,\n        weights=[self.embeddings_matrix],\n        input_length=self.hparams.sequence_length,\n        trainable=self.hparams.train_embedding)(\n            I)\n    X5 = Conv1D(128, 5, activation='relu', padding='same')(E)\n    X5 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X5)\n    X4 = Conv1D(128, 4, activation='relu', padding='same')(E)\n    X4 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X4)\n    X3 = Conv1D(128, 3, activation='relu', padding='same')(E)\n    X3 = MaxPooling1D(self.hparams.sequence_length, padding='same')(X3)\n    X = Concatenate(axis=-1)([X5, X4, X3])\n    X = Flatten()(X)\n    X = Dropout(self.hparams.dropout_rate)(X)\n    X = Dense(128, activation='relu')(X)\n    X = Dropout(self.hparams.dropout_rate)(X)\n    Output = Dense(self.num_labels, activation='sigmoid', name='outputs')(X)\n\n    model = Model(inputs=I, outputs=Output)\n    model.compile(\n        optimizer=Adam(lr=self.hparams.learning_rate),\n        loss='binary_crossentropy',\n        metrics=['accuracy', auc_roc])\n    print(model.summary())\n    return model\n"
  },
  {
    "path": "kaggle-classification/requirements.txt",
    "content": "absl-py==0.1.9\nastor==0.6.2\nbleach==3.3.0\ncertifi==2024.7.4\nchardet==3.0.4\ncomet-ml==1.0.8\nenum34==1.1.6\nfutures==3.1.1\ngast==0.2.0\ngrpcio==1.53.2\nh5py==2.7.1\nhtml5lib==0.999999999\nidna==3.7\nkaggle==1.0.5\nKeras==2.13.1\nMarkdown==2.6.11\nmypy==0.600\nnltk==3.9\nnumpy==1.22.0\npandas==0.22.0\nPillow==10.3.0\nprotobuf==3.18.3\npython-dateutil==2.6.1\npytz==2017.3\nPyYAML==5.4\nrequests==2.32.2\nscikit-learn==1.5.0\nscipy==1.10.0\nsix==1.11.0\nsklearn==0.0\ntensorboard==1.8.0\ntensorflow==2.12.1\ntensorflow-tensorboard==1.5.1\ntermcolor==1.1.0\ntflearn==0.3.2\ntyped-ast==1.1.0\nurllib3==1.26.18\nwebsocket-client==0.47.0\nWerkzeug==3.0.3\nwurlitzer==1.0.2\n"
  },
  {
    "path": "kaggle-classification/setup.py",
    "content": "from setuptools import find_packages\nfrom setuptools import setup\n\nREQUIRED_PACKAGES = [\n    'tflearn>=0.3.2', 'Keras==2.13.1', 'h5py==2.7.1', 'comet-ml==1.0.8',\n    'nltk>=3.3'\n]\n\nsetup(\n    name='trainer',\n    version='0.1',\n    install_requires=REQUIRED_PACKAGES,\n    packages=find_packages(),\n    include_package_data=True,\n    description='tflearn.')\n\nsetup(\n    name='keras_trainer',\n    version='0.1',\n    install_requires=REQUIRED_PACKAGES,\n    packages=find_packages(),\n    include_package_data=True,\n    description='tflearn.')\n\nsetup(\n    name='tf_trainer',\n    version='0.1',\n    install_requires=REQUIRED_PACKAGES,\n    packages=find_packages(),\n    include_package_data=True,\n    description='tflearn.')\n"
  },
  {
    "path": "kaggle-classification/trainer/__init__.py",
    "content": ""
  },
  {
    "path": "kaggle-classification/trainer/model.py",
    "content": "\"\"\"Classifiers for the Toxic Comment Classification Kaggle challenge, https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge\n\nTo run locally:\n  python trainer/model.py --train_data=train.csv --predict_data=test.csv\n  --y_class=toxic\n\nTo run locally using Cloud ML Engine:\n  gcloud ml-engine local train \\\n        --module-name=trainer.model \\\n        --package-path=trainer \\\n        --job-dir=model -- \\\n        --train_data=train.csv \\\n        --predict_data=test.csv \\\n        --y_class=toxic \\\n        --train_steps=100\n\nTo run TensorBoard locally:\n  tensorboard --logdir=model/\n\nThen visit http://localhost:6006/ to see the dashboard.\n\"\"\"\n\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport argparse\nimport os\nimport sys\nimport pandas as pd\nimport tensorflow as tf\nfrom sklearn import metrics\nfrom trainer import wikidata\nfrom collections import namedtuple\n\nfrom tensorflow.contrib.training.python.training import hparam\n\nFLAGS = None\n\n# Data Params\nTRAIN_PERCENT = .8  # Percent of data to allocate to training\nDATA_SEED = 48173  # Random seed used for splitting the data into train/test\nMAX_LABEL = 2\nMAX_DOCUMENT_LENGTH = 500  # Max length of each comment in words\n\n# CNN parameters\nDEFAULT_FILTER_SIZES = [2, 3, 4, 5]\n\n# Bag of Word parameters\nBOWParams = namedtuple(\"BOWParams\", [\"EMBEDDING_SIZE\"])\nBOW_PARAMS = BOWParams(EMBEDDING_SIZE=20)\n\nWORDS_FEATURE = \"words\"  # Name of the input words feature.\nMODEL_LIST = [\"bag_of_words\", \"cnn\"]  # Possible models\n\n# Training Params\nTRAIN_SEED = 9812  # Random seed used to initialize training\nBATCH_SIZE = 128\n\n\ndef estimator_spec_for_softmax_classification(logits, labels, mode,\n                                              learning_rate):\n  \"\"\"Depending on the value of mode, different EstimatorSpec arguments are required.\n\n  For mode == ModeKeys.TRAIN: required fields are loss and train_op.\n  For mode == ModeKeys.EVAL: required field is loss.\n  For mode == ModeKeys.PREDICT: required fields are predictions.\n\n  Returns EstimatorSpec instance for softmax classification.\n  \"\"\"\n  predicted_classes = tf.argmax(logits, axis=1)\n  predicted_probs = tf.nn.softmax(logits, name=\"softmax_tensor\")\n\n  predictions = {\n      # Holds the raw logit values\n      \"logits\": logits,\n\n      # Holds the class id (0,1) representing the model's prediction of the most\n      # likely species for this example.\n      \"classes\": predicted_classes,\n\n      # Holds the probabilities for each prediction\n      \"probs\": predicted_probs,\n  }\n\n  # Represents an output of a model that can be served.\n  export_outputs = {\n      \"output\": tf.estimator.export.ClassificationOutput(scores=predicted_probs)\n  }\n\n  # PREDICT Mode\n  if mode == tf.estimator.ModeKeys.PREDICT:\n    return tf.estimator.EstimatorSpec(\n        mode=mode, predictions=predictions, export_outputs=export_outputs)\n\n  # Calculate loss for both TRAIN and EVAL modes\n  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)\n\n  eval_metric_ops = {\n      \"accuracy\":\n          tf.metrics.accuracy(\n              labels=labels, predictions=predicted_classes, name=\"acc_op\"),\n      \"auc\":\n          tf.metrics.auc(\n              labels=labels, predictions=predicted_classes, name=\"auc_op\"),\n  }\n\n  # Add summary ops to the graph. These metrics will be tracked graphed\n  # on each checkpoint by TensorBoard.\n  tf.summary.scalar(\"accuracy\", eval_metric_ops[\"accuracy\"][1])\n  tf.summary.scalar(\"auc\", eval_metric_ops[\"auc\"][1])\n\n  # TRAIN Mode\n  if mode == tf.estimator.ModeKeys.TRAIN:\n    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())\n    logging_hook = tf.train.LoggingTensorHook(\n        tensors={\"loss\": loss}, every_n_iter=50)\n\n    return tf.estimator.EstimatorSpec(\n        mode=mode,\n        loss=loss,\n        train_op=train_op,\n        training_hooks=[logging_hook],\n        predictions={\"loss\": loss},\n        export_outputs=export_outputs,\n        eval_metric_ops=eval_metric_ops)\n\n  # EVAL Mode\n  assert mode == tf.estimator.ModeKeys.EVAL\n\n  return tf.estimator.EstimatorSpec(\n      mode=mode,\n      loss=loss,\n      predictions=predictions,\n      eval_metric_ops=eval_metric_ops,\n      export_outputs=export_outputs)\n\n\ndef get_cnn_model(embedding_size, num_filters, dropout_keep_prob):\n\n  def cnn_model(features, labels, mode):\n    filter_sizes = DEFAULT_FILTER_SIZES\n\n    with tf.name_scope(\"embedding\"):\n      W = tf.Variable(\n          tf.random_uniform([n_words, embedding_size], -1.0, 1.0), name=\"W\")\n\n      embedded_chars = tf.nn.embedding_lookup(W, features[WORDS_FEATURE])\n      embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)\n\n    pooled_outputs = []\n    for i, filter_size in enumerate(filter_sizes):\n      with tf.name_scope(\"conv-maxpool-%s\" % filter_size):\n\n        # Convolution Layer\n        filter_shape = [filter_size, embedding_size, 1, num_filters]\n        W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name=\"W\")\n        b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name=\"b\")\n        conv = tf.nn.conv2d(\n            embedded_chars_expanded,\n            W,\n            strides=[1, 1, 1, 1],\n            padding=\"VALID\",\n            name=\"conv\")\n        # Apply nonlinearity\n        hh = tf.nn.relu(tf.nn.bias_add(conv, b), name=\"relu\")\n\n        # Max-pooling over the outputs. Max over samples in batch and\n        # all filters.\n        pooled = tf.nn.max_pool(\n            hh,\n            ksize=[1, MAX_DOCUMENT_LENGTH - filter_size + 1, 1, 1],\n            strides=[1, 1, 1, 1],\n            padding=\"VALID\",\n            name=\"pool\")\n\n        pooled_outputs.append(pooled)\n\n    # Combine all the pooled features\n    num_filters_total = num_filters * len(filter_sizes)\n    h_pool = tf.concat(pooled_outputs, 3)\n    h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])\n\n    # Add dropout in training\n    with tf.name_scope(\"dropout\"):\n      # Set dropout rate to 1 (disable dropout) by default\n      h_drop = tf.nn.dropout(h_pool_flat, 1.0)\n\n      if mode == tf.estimator.ModeKeys.TRAIN:\n        h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob)\n\n    # Add a fully connected layer to do prediction\n    with tf.name_scope(\"output\"):\n      W = tf.Variable(\n          tf.truncated_normal([num_filters_total, MAX_LABEL], stddev=0.1),\n          name=\"W\")\n      b = tf.Variable(tf.constant(0.1, shape=[MAX_LABEL]), name=\"b\")\n      scores = tf.nn.xw_plus_b(h_drop, W, b, name=\"scores\")\n\n    return estimator_spec_for_softmax_classification(\n        logits=scores,\n        labels=labels,\n        mode=mode,\n        learning_rate=FLAGS.learning_rate)\n\n  return cnn_model\n\n\ndef bag_of_words_model(features, labels, mode):\n  \"\"\"A bag-of-words model using a learned word embedding.\n\n  Note it disregards the word order in the text.\n\n  Returns a tf.estimator.EstimatorSpec.\n  \"\"\"\n\n  bow_column = tf.feature_column.categorical_column_with_identity(\n      WORDS_FEATURE, num_buckets=n_words)\n\n  # The embedding values are initialized randomly, and are trained along with\n  # all other model parameters to minimize the training loss.\n  bow_embedding_column = tf.feature_column.embedding_column(\n      bow_column, dimension=BOW_PARAMS.EMBEDDING_SIZE)\n\n  bow = tf.feature_column.input_layer(\n      features, feature_columns=[bow_embedding_column])\n\n  logits = tf.layers.dense(bow, MAX_LABEL, activation=None)\n\n  return estimator_spec_for_softmax_classification(\n      logits=logits,\n      labels=labels,\n      mode=mode,\n      learning_rate=FLAGS.learning_rate)\n\n\ndef main(FLAGS):\n  global n_words\n\n  tf.logging.set_verbosity(tf.logging.INFO)\n\n  if FLAGS.verbose:\n    tf.logging.info(\"Running in verbose mode\")\n    tf.logging.set_verbosity(tf.logging.DEBUG)\n\n  # Load and split data\n  tf.logging.info(\"Loading data from {0}\".format(FLAGS.train_data))\n\n  data = wikidata.WikiData(\n      FLAGS.train_data,\n      FLAGS.y_class,\n      seed=DATA_SEED,\n      train_percent=TRAIN_PERCENT,\n      max_document_length=MAX_DOCUMENT_LENGTH,\n      char_ngrams=FLAGS.char_ngrams,\n      min_frequency=FLAGS.min_frequency)\n\n  n_words = len(data.vocab_processor.vocabulary_)\n  tf.logging.info(\"Total words: %d\" % n_words)\n\n  # Build model\n  if FLAGS.model == \"bag_of_words\":\n    model_fn = bag_of_words_model\n\n    # Subtract 1 because VocabularyProcessor outputs a word-id matrix where word\n    # ids start from 1 and 0 means 'no word'. But categorical_column_with_identity\n    # assumes 0-based count and uses -1 for missing word.\n    data.x_train = data.x_train - 1\n    data.x_test = data.x_test - 1\n  elif FLAGS.model == \"cnn\":\n    model_fn = get_cnn_model(FLAGS.embedding_size, FLAGS.num_filters,\n                             FLAGS.dropout_keep_prob)\n  else:\n    tf.logging.error(\"Unknown specified model '{}', must be one of {}\".format(\n        FLAGS.model, MODEL_LIST))\n    raise ValueError\n\n  classifier = tf.estimator.Estimator(\n      model_fn=model_fn,\n      config=tf.contrib.learn.RunConfig(\n          tf_random_seed=TRAIN_SEED,\n          ## Uncomment to see CPU/GPU allocation in logs.\n          # session_config=tf.ConfigProto(log_device_placement=True),\n      ),\n      model_dir=FLAGS.job_dir)\n\n  # Train model\n  train_input_fn = tf.estimator.inputs.numpy_input_fn(\n      x={WORDS_FEATURE: data.x_train},\n      y=data.y_train,\n      batch_size=BATCH_SIZE,\n      num_epochs=None,  # Note: For training, set this to None, so the input_fn\n      # keeps returning data until the required number of train\n      # steps is reached.\n      shuffle=True)\n  classifier.train(input_fn=train_input_fn, steps=FLAGS.train_steps)\n\n  # Predict on held-out test data\n  test_input_fn = tf.estimator.inputs.numpy_input_fn(\n      x={WORDS_FEATURE: data.x_test},\n      y=data.y_test,\n      num_epochs=1,  # Note: For evaluation and prediction set this to 1,\n      # so the input_fn will iterate over the data once and\n      # then raise OutOfRangeError\n      shuffle=False)\n  predicted_test = classifier.predict(input_fn=test_input_fn)\n  test_out = pd.DataFrame(\n      [(p[\"classes\"], p[\"probs\"][1]) for p in predicted_test],\n      columns=[\"y_predicted\", \"prob\"])\n\n  # Score with sklearn and TensorFlow\n  sklearn_score = metrics.accuracy_score(data.y_test, test_out[\"y_predicted\"])\n  tf_scores = classifier.evaluate(input_fn=test_input_fn)\n\n  train_size = len(data.x_train)\n  test_size = len(data.x_test)\n\n  baseline = len(data.y_train[data.y_train == 0]) / len(data.y_train)\n  if baseline < .5:\n    baseline = 1 - baseline\n\n  tf.logging.info(\"\")\n  tf.logging.info(\"----------Evaluation on Held-Out Data---------\")\n  tf.logging.info(\"Train Size: {0} Test Size: {1}\".format(\n      train_size, test_size))\n  tf.logging.info(\"Baseline (class distribution): {0:f}\".format(baseline))\n  tf.logging.info(\"Accuracy (sklearn): {0:f}\".format(sklearn_score))\n\n  for key in sorted(tf_scores):\n    tf.logging.info(\"%s: %s\" % (key, tf_scores[key]))\n\n  # Export the model\n  feature_spec = {\n      WORDS_FEATURE:\n          tf.FixedLenFeature(dtype=tf.int64, shape=MAX_DOCUMENT_LENGTH)\n  }\n  serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(\n      feature_spec)\n\n  classifier.export_savedmodel(FLAGS.job_dir, serving_input_fn)\n\n\nif __name__ == \"__main__\":\n\n  parser = argparse.ArgumentParser()\n  parser.add_argument(\n      \"--verbose\", help=\"Run in verbose mode.\", action=\"store_true\")\n  parser.add_argument(\n      \"--train_data\", type=str, default=\"\", help=\"Path to the training data.\")\n  parser.add_argument(\n      \"--y_class\",\n      type=str,\n      default=\"toxic\",\n      help=\"Class to train model against, one of cnn, bag_of_words\")\n  parser.add_argument(\n      \"--model\",\n      type=str,\n      default=\"bag_of_words\",\n      help=\"The model to train, one of {}\".format(MODEL_LIST))\n  parser.add_argument(\n      \"--train_steps\",\n      type=int,\n      default=100,\n      help=\"The number of steps to train the model\")\n  parser.add_argument(\n      \"--embedding_size\",\n      type=int,\n      default=50,\n      help=\"The size of the word embedding\")\n  parser.add_argument(\n      \"--dropout_keep_prob\",\n      type=float,\n      default=0.75,\n      help=\"The dropout keep probability\")\n  parser.add_argument(\n      \"--num_filters\",\n      type=int,\n      default=10,\n      help=\"The number of filters in each size\")\n  parser.add_argument(\n      \"--job-dir\",\n      type=str,\n      default=\"\",\n      help=\"The directory where the job is staged\")\n  parser.add_argument(\n      \"--char_ngrams\",\n      type=int,\n      default=0,\n      help=\"Size of overlapping character ngrams to split into, use words if 0\")\n  parser.add_argument(\n      \"--learning_rate\",\n      type=float,\n      default=0.01,\n      help=\"The model learning rate\")\n  parser.add_argument(\n      \"--min_frequency\",\n      type=int,\n      default=0,\n      help=\"Minimum count for tokens passed to VocabularyProcessor\")\n\n  FLAGS = parser.parse_args()\n\n  main(FLAGS)\n"
  },
  {
    "path": "kaggle-classification/trainer/wikidata.py",
    "content": "\"\"\"Class to encapsulate training and test data.\"\"\"\n\nimport numpy as np\nimport pandas as pd\nimport tensorflow as tf\nimport tflearn\nfrom sklearn.model_selection import train_test_split\n\nY_CLASSES = [\n    'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'\n]\n\n\ndef ngrams(sentence, ngram_size):\n  \"\"\"Converts a string into a list of ngrams of characters.\n\n  ngrams('abra cadabra', 5) =\n    [('a', 'b', 'r', 'a', ' '), ('b', 'r', 'a', ' ', 'c'), ...\n     ('a', 'd', 'a', 'b', 'r'), ('d', 'a', 'b', 'r', 'a')]\n  \"\"\"\n  chars = list(sentence)\n  return zip(*[chars[i:] for i in range(ngram_size)])\n\n\nclass WikiData:\n\n  def __init__(self,\n               data_path,\n               y_class,\n               max_document_length,\n               vocab_processor_path=None,\n               test_mode=False,\n               seed=None,\n               train_percent=None,\n               char_ngrams=None,\n               min_frequency=None):\n    \"\"\"Args:\n\n      * data_path (string): path to file containing train or test data\n      * y_class (string): the class we're training or testing on\n      * vocab_processor_path (string): if provided, the comment_text data will\n      be processed with the vocab processor at that location. If not, a new\n      vocab_processor will be created using the training data.\n      * test_mode (boolean): true if loading data just to test on, not training\n      a model\n      * seed (integer): a random seed to use for data splitting\n      * train_percent (fload): the percent of data we should use for training\n      data\n    \"\"\"\n    data = self._load_csv(data_path)\n\n    self.x_train, self.x_train_text = None, None\n    self.x_test, self.x_test_text = None, None\n    self.y_train = None\n    self.y_test = None\n    self.vocab_processor = None\n\n    # If test_mode is True, then put all the data in x_test and y_test\n    if test_mode:\n      train_percent = 0\n\n    # Split the data into test / train sets\n    self.x_train_text, self.x_test_text, self.y_train, self.y_test \\\n      = self._split(data, train_percent, 'comment_text', y_class, seed)\n\n    # Either load a VocabularyProcessor or compute one from the training data\n    if test_mode:\n\n      # If test_mode is True and no vocab_processor_path is specified, then\n      # return an error. We shouldn't train a VocabProcessor at test time.\n      if vocab_processor_path is None:\n        tf.logging.error(\n            'Loading data in test_mode with no vocab_processor_path')\n        raise ValueError\n\n      self.vocab_processor = self.load_vocab_processor(vocab_processor_path)\n\n    else:\n      tokenizer_fn = None\n      if char_ngrams:\n        tokenizer_fn = lambda iterator: (\n            ngrams(x, char_ngrams) for x in iterator)\n      self.vocab_processor = tflearn.data_utils.VocabularyProcessor(\n          max_document_length=max_document_length,\n          min_frequency=min_frequency,\n          tokenizer_fn=tokenizer_fn)\n      self.x_train = np.array(\n          list(self.vocab_processor.fit_transform(self.x_train_text)))\n\n    # Apply the VocabularyProcessor to the test data\n    self.x_test = np.array(\n        list(self.vocab_processor.transform(self.x_test_text)))\n\n  def _load_vocab_processor(self, path):\n    \"\"\"Load a VocabularyProcessor from the provided path\"\"\"\n    return tflearn.data_utils.VocabularyProcessor.restore(path)\n\n  def _load_csv(self, path):\n    \"\"\"Reads CSV from specified location and returns the data as a Pandas Dataframe. Will work with a Cloud Storage path, e.g.\n\n    'gs://<bucket>/<blob>' or a local path.\n\n    Assumes data can fit into memory.\n    \"\"\"\n    with tf.gfile.Open(path, 'rb') as fileobj:\n      df = pd.read_csv(fileobj, encoding='utf-8')\n\n    return df\n\n  def _split(self, data, train_percent, x_field, y_class, seed=None):\n    \"\"\"Split divides the Wikipedia data into test and train subsets.\n\n    Args:\n      * data (dataframe): a dataframe with data for 'comment_text' and y_class\n      * train_percent (float): the fraction of data to use for training\n      * x_field (string): attribute of the wiki data to use to train, e.g.\n        'comment_text'\n      * y_class (string): attribute of the wiki data to predict, e.g. 'toxic'\n      * seed (integer): a seed to use to split the data in a reproducible way\n\n    Returns:\n      x_train (dataframe): a pandas series with the text from each train example\n      y_train (dataframe): the 0 or 1 labels for the training data\n      x_test (dataframe):  a pandas series with the text from each test example\n      y_test (dataframe):  the 0 or 1 labels for the test data\n    \"\"\"\n\n    if y_class not in Y_CLASSES:\n      tf.logging.error('Specified y_class {0} not in list of possible classes {1}'\\\n            .format(y_class, Y_CLASSES))\n      raise ValueError\n\n    if train_percent > 1 or train_percent < 0:\n      tf.logging.error('Specified train_percent {0} is not between 0 and 1'\\\n            .format(train_percent))\n      raise ValueError\n\n    X = data[x_field]\n    y = data[y_class]\n    x_train, x_test, y_train, y_test = train_test_split(\n        X, y, test_size=1 - train_percent, random_state=seed)\n\n    return x_train, x_test, np.array(y_train), np.array(y_test)\n"
  },
  {
    "path": "model_evaluation/BiosBias Evaluation.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Imports\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from __future__ import absolute_import\\n\",\n    \"from __future__ import division\\n\",\n    \"from __future__ import print_function\\n\",\n    \"\\n\",\n    \"from IPython.display import display\\n\",\n    \"import json\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas as pd\\n\",\n    \"import os\\n\",\n    \"import random\\n\",\n    \"import re\\n\",\n    \"import seaborn as sns\\n\",\n    \"import matplotlib.pyplot as plt\\n\",\n    \"import sklearn.metrics as metrics\\n\",\n    \"import tensorflow as tf\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Read scored test data\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"standard_data_path = 'gs://conversationai-models/biosbias/scored_data/test_standard_0409.csv'\\n\",\n    \"scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_scrubbed_0409.csv'\\n\",\n    \"very_scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_very_scrubbed_0409.csv'\\n\",\n    \"gender_data_path = 'gs://conversationai-models/biosbias/scored_data/test_data_gender.csv'\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"perf_df = pd.read_csv(tf.gfile.Open(standard_data_path)).drop_duplicates(subset=['tokens'])\\n\",\n    \"scrubbed_df = pd.read_csv(tf.gfile.Open(scrubbed_data_path)).drop_duplicates(subset=['tokens'])\\n\",\n    \"very_scrubbed_df = pd.read_csv(tf.gfile.Open(very_scrubbed_data_path)).drop_duplicates(subset=['tokens'])\\n\",\n    \"gender_df = pd.read_csv(tf.gfile.Open(gender_data_path)).drop_duplicates(subset=['tokens'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"(59824, 300)\\n\",\n      \"(59820, 36)\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print(perf_df.shape)\\n\",\n    \"print(scrubbed_df.shape)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = perf_df.join(scrubbed_df, rsuffix = '_scrubbed')\\n\",\n    \"df = df.join(very_scrubbed_df, rsuffix = '_very_scrubbed')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>tokens</th>\\n\",\n       \"      <th>gender</th>\\n\",\n       \"      <th>label</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6</th>\\n\",\n       \"      <th>...</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>[u'he', u'is', u'currently', u'working', u'clo...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.000008</td>\\n\",\n       \"      <td>4.625991e-14</td>\\n\",\n       \"      <td>0.000089</td>\\n\",\n       \"      <td>0.000432</td>\\n\",\n       \"      <td>2.642943e-04</td>\\n\",\n       \"      <td>1.613340e-07</td>\\n\",\n       \"      <td>4.687537e-07</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001929</td>\\n\",\n       \"      <td>1.914383e-06</td>\\n\",\n       \"      <td>0.000097</td>\\n\",\n       \"      <td>0.000332</td>\\n\",\n       \"      <td>7.086468e-07</td>\\n\",\n       \"      <td>8.798547e-16</td>\\n\",\n       \"      <td>0.000041</td>\\n\",\n       \"      <td>0.000395</td>\\n\",\n       \"      <td>0.000054</td>\\n\",\n       \"      <td>8.315536e-08</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>[u'she', u'has', u'a', u'passion', u'for', u'w...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>26</td>\\n\",\n       \"      <td>0.000001</td>\\n\",\n       \"      <td>5.970340e-18</td>\\n\",\n       \"      <td>0.000004</td>\\n\",\n       \"      <td>0.000155</td>\\n\",\n       \"      <td>8.439872e-06</td>\\n\",\n       \"      <td>1.380430e-07</td>\\n\",\n       \"      <td>8.653511e-09</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.013356</td>\\n\",\n       \"      <td>7.866625e-01</td>\\n\",\n       \"      <td>0.009269</td>\\n\",\n       \"      <td>0.024264</td>\\n\",\n       \"      <td>3.710595e-04</td>\\n\",\n       \"      <td>2.425320e-11</td>\\n\",\n       \"      <td>0.004488</td>\\n\",\n       \"      <td>0.002426</td>\\n\",\n       \"      <td>0.032467</td>\\n\",\n       \"      <td>1.274749e-04</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>[u'growing', u'up', u'under', u'the', u'influe...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>22</td>\\n\",\n       \"      <td>0.000205</td>\\n\",\n       \"      <td>1.023775e-15</td>\\n\",\n       \"      <td>0.008020</td>\\n\",\n       \"      <td>0.000054</td>\\n\",\n       \"      <td>1.159827e-06</td>\\n\",\n       \"      <td>2.420847e-06</td>\\n\",\n       \"      <td>4.043094e-06</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000135</td>\\n\",\n       \"      <td>8.046401e-04</td>\\n\",\n       \"      <td>0.002173</td>\\n\",\n       \"      <td>0.000697</td>\\n\",\n       \"      <td>3.003297e-05</td>\\n\",\n       \"      <td>8.979249e-14</td>\\n\",\n       \"      <td>0.001901</td>\\n\",\n       \"      <td>0.000097</td>\\n\",\n       \"      <td>0.001727</td>\\n\",\n       \"      <td>4.318769e-06</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>[u'he', u'earned', u'his', u'beng', u'degree',...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.000009</td>\\n\",\n       \"      <td>1.354895e-13</td>\\n\",\n       \"      <td>0.001508</td>\\n\",\n       \"      <td>0.000051</td>\\n\",\n       \"      <td>1.071294e-07</td>\\n\",\n       \"      <td>1.333064e-08</td>\\n\",\n       \"      <td>1.857020e-05</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.009217</td>\\n\",\n       \"      <td>1.700057e-02</td>\\n\",\n       \"      <td>0.136035</td>\\n\",\n       \"      <td>0.009581</td>\\n\",\n       \"      <td>2.460610e-03</td>\\n\",\n       \"      <td>1.396903e-09</td>\\n\",\n       \"      <td>0.002276</td>\\n\",\n       \"      <td>0.009811</td>\\n\",\n       \"      <td>0.026841</td>\\n\",\n       \"      <td>1.840305e-04</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>[u'her', u'professional', u'and', u'educationa...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.001034</td>\\n\",\n       \"      <td>6.887217e-12</td>\\n\",\n       \"      <td>0.000701</td>\\n\",\n       \"      <td>0.021189</td>\\n\",\n       \"      <td>1.852501e-03</td>\\n\",\n       \"      <td>6.723991e-05</td>\\n\",\n       \"      <td>7.880444e-06</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000425</td>\\n\",\n       \"      <td>9.174340e-08</td>\\n\",\n       \"      <td>0.995151</td>\\n\",\n       \"      <td>0.001635</td>\\n\",\n       \"      <td>9.952086e-11</td>\\n\",\n       \"      <td>4.422046e-14</td>\\n\",\n       \"      <td>0.000974</td>\\n\",\n       \"      <td>0.000039</td>\\n\",\n       \"      <td>0.000482</td>\\n\",\n       \"      <td>1.483144e-07</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"<p>5 rows × 372 columns</p>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"                                              tokens gender  label  \\\\\\n\",\n       \"0  [u'he', u'is', u'currently', u'working', u'clo...      M     25   \\n\",\n       \"1  [u'she', u'has', u'a', u'passion', u'for', u'w...      F     26   \\n\",\n       \"2  [u'growing', u'up', u'under', u'the', u'influe...      M     22   \\n\",\n       \"3  [u'he', u'earned', u'his', u'beng', u'degree',...      M     25   \\n\",\n       \"4  [u'her', u'professional', u'and', u'educationa...      F     25   \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0  \\\\\\n\",\n       \"0                                           0.000008                           \\n\",\n       \"1                                           0.000001                           \\n\",\n       \"2                                           0.000205                           \\n\",\n       \"3                                           0.000009                           \\n\",\n       \"4                                           0.001034                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1  \\\\\\n\",\n       \"0                                       4.625991e-14                           \\n\",\n       \"1                                       5.970340e-18                           \\n\",\n       \"2                                       1.023775e-15                           \\n\",\n       \"3                                       1.354895e-13                           \\n\",\n       \"4                                       6.887217e-12                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2  \\\\\\n\",\n       \"0                                           0.000089                           \\n\",\n       \"1                                           0.000004                           \\n\",\n       \"2                                           0.008020                           \\n\",\n       \"3                                           0.001508                           \\n\",\n       \"4                                           0.000701                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3  \\\\\\n\",\n       \"0                                           0.000432                           \\n\",\n       \"1                                           0.000155                           \\n\",\n       \"2                                           0.000054                           \\n\",\n       \"3                                           0.000051                           \\n\",\n       \"4                                           0.021189                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4  \\\\\\n\",\n       \"0                                       2.642943e-04                           \\n\",\n       \"1                                       8.439872e-06                           \\n\",\n       \"2                                       1.159827e-06                           \\n\",\n       \"3                                       1.071294e-07                           \\n\",\n       \"4                                       1.852501e-03                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5  \\\\\\n\",\n       \"0                                       1.613340e-07                           \\n\",\n       \"1                                       1.380430e-07                           \\n\",\n       \"2                                       2.420847e-06                           \\n\",\n       \"3                                       1.333064e-08                           \\n\",\n       \"4                                       6.723991e-05                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6  \\\\\\n\",\n       \"0                                       4.687537e-07                           \\n\",\n       \"1                                       8.653511e-09                           \\n\",\n       \"2                                       4.043094e-06                           \\n\",\n       \"3                                       1.857020e-05                           \\n\",\n       \"4                                       7.880444e-06                           \\n\",\n       \"\\n\",\n       \"                                      ...                                      \\\\\\n\",\n       \"0                                     ...                                       \\n\",\n       \"1                                     ...                                       \\n\",\n       \"2                                     ...                                       \\n\",\n       \"3                                     ...                                       \\n\",\n       \"4                                     ...                                       \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23  \\\\\\n\",\n       \"0                                           0.001929                            \\n\",\n       \"1                                           0.013356                            \\n\",\n       \"2                                           0.000135                            \\n\",\n       \"3                                           0.009217                            \\n\",\n       \"4                                           0.000425                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24  \\\\\\n\",\n       \"0                                       1.914383e-06                            \\n\",\n       \"1                                       7.866625e-01                            \\n\",\n       \"2                                       8.046401e-04                            \\n\",\n       \"3                                       1.700057e-02                            \\n\",\n       \"4                                       9.174340e-08                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25  \\\\\\n\",\n       \"0                                           0.000097                            \\n\",\n       \"1                                           0.009269                            \\n\",\n       \"2                                           0.002173                            \\n\",\n       \"3                                           0.136035                            \\n\",\n       \"4                                           0.995151                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26  \\\\\\n\",\n       \"0                                           0.000332                            \\n\",\n       \"1                                           0.024264                            \\n\",\n       \"2                                           0.000697                            \\n\",\n       \"3                                           0.009581                            \\n\",\n       \"4                                           0.001635                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27  \\\\\\n\",\n       \"0                                       7.086468e-07                            \\n\",\n       \"1                                       3.710595e-04                            \\n\",\n       \"2                                       3.003297e-05                            \\n\",\n       \"3                                       2.460610e-03                            \\n\",\n       \"4                                       9.952086e-11                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28  \\\\\\n\",\n       \"0                                       8.798547e-16                            \\n\",\n       \"1                                       2.425320e-11                            \\n\",\n       \"2                                       8.979249e-14                            \\n\",\n       \"3                                       1.396903e-09                            \\n\",\n       \"4                                       4.422046e-14                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29  \\\\\\n\",\n       \"0                                           0.000041                            \\n\",\n       \"1                                           0.004488                            \\n\",\n       \"2                                           0.001901                            \\n\",\n       \"3                                           0.002276                            \\n\",\n       \"4                                           0.000974                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30  \\\\\\n\",\n       \"0                                           0.000395                            \\n\",\n       \"1                                           0.002426                            \\n\",\n       \"2                                           0.000097                            \\n\",\n       \"3                                           0.009811                            \\n\",\n       \"4                                           0.000039                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31  \\\\\\n\",\n       \"0                                           0.000054                            \\n\",\n       \"1                                           0.032467                            \\n\",\n       \"2                                           0.001727                            \\n\",\n       \"3                                           0.026841                            \\n\",\n       \"4                                           0.000482                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32  \\n\",\n       \"0                                       8.315536e-08                           \\n\",\n       \"1                                       1.274749e-04                           \\n\",\n       \"2                                       4.318769e-06                           \\n\",\n       \"3                                       1.840305e-04                           \\n\",\n       \"4                                       1.483144e-07                           \\n\",\n       \"\\n\",\n       \"[5 rows x 372 columns]\"\n      ]\n     },\n     \"execution_count\": 5,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"(59824, 372)\"\n      ]\n     },\n     \"execution_count\": 6,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.shape\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"(59753, 372)\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"df = df.dropna()\\n\",\n    \"print(df.shape)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Preprocessing\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_class_from_col_name(col_name):\\n\",\n    \"    #print(col_name)\\n\",\n    \"    pattern = r'^.*_(\\\\d+)$'\\n\",\n    \"    return int(re.search(pattern, col_name).group(1))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 10,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def find_best_class(df, model_name, class_names):\\n\",\n    \"    model_class_names = ['{}_{}'.format(model_name, class_name) for class_name in class_names]\\n\",\n    \"    sub_df = df[model_class_names]\\n\",\n    \"    df['{}_class'.format(model_name)] = sub_df.idxmax(axis=1).apply(get_class_from_col_name)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"metadata\": {\n    \"collapsed\": true\n   },\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"array(['tokens', 'gender', 'label',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_32',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_32',\\n\",\n       \"       'tokens_scrubbed', 'gender_scrubbed', 'label_scrubbed',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_32',\\n\",\n       \"       'tokens_very_scrubbed', 'gender_very_scrubbed',\\n\",\n       \"       'label_very_scrubbed',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_0',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_1',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_2',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_3',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_4',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_5',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_6',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_7',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_8',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_9',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_10',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_11',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_12',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_13',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_14',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_15',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_16',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_17',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_18',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_19',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_20',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_21',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_22',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31',\\n\",\n       \"       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32'],\\n\",\n       \"      dtype=object)\"\n      ]\n     },\n     \"execution_count\": 8,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# Can check model names here\\n\",\n    \"# df.columns.values\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 11,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# May have to change.\\n\",\n    \"# Can look them up in experiment tracker.\\n\",\n    \"MODEL_NAMES = {\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837': 'debiased_tolga',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941': 'debiased_biosbias',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003': 'strong_debiased_1',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019': 'strong_debiased_2',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034': 'strong_debiased_3',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055': 'strong_debiased_4',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117': 'glove',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113': 'strong_no_equalize',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131': 'strong_no_projection', \\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954': 'scrubbed',\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254': 'very_scrubbed'\\n\",\n    \"}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 12,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"CLASS_NAMES = range(33)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 13,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"for _model in MODEL_NAMES:\\n\",\n    \"    find_best_class(df, _model, CLASS_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 14,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Labels with either gender having too few examples\\n\",\n    \"bad_labels = df.groupby('label').gender.value_counts().reset_index(name = 'count').query('count < 5').label.values\\n\",\n    \"assert len(bad_labels) == 0\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Accuracy Calculation\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 15,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Accuracy for model debiased_biosbias: 0.806972034877\\n\",\n      \"Accuracy for model very_scrubbed: 0.355915184175\\n\",\n      \"Accuracy for model debiased_tolga: 0.818921225713\\n\",\n      \"Accuracy for model strong_debiased_1: 0.817984034274\\n\",\n      \"Accuracy for model strong_no_projection: 0.806687530333\\n\",\n      \"Accuracy for model strong_debiased_2: 0.81733134738\\n\",\n      \"Accuracy for model strong_no_equalize: 0.815239402206\\n\",\n      \"Accuracy for model glove: 0.817950563152\\n\",\n      \"Accuracy for model strong_debiased_4: 0.814737335364\\n\",\n      \"Accuracy for model strong_debiased_3: 0.817599116362\\n\",\n      \"Accuracy for model scrubbed: 0.130503907754\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"accuracy_list = []\\n\",\n    \"for _model in MODEL_NAMES:\\n\",\n    \"    is_correct = (df['{}_class'.format(_model)] == df['label'])\\n\",\n    \"    _acc = sum(is_correct)/len(is_correct)\\n\",\n    \"    accuracy_list.append(_acc)\\n\",\n    \"    print ('Accuracy for model {}: {}'.format(MODEL_NAMES[_model], _acc))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Fairness Metrics\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 16,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"for _class in CLASS_NAMES:\\n\",\n    \"    df['label_{}'.format(_class)] = (df['label'] == _class)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 17,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Gender ratios of classes\\n\",\n    \"gender_counts = df.groupby('label').gender.value_counts().reset_index(name = 'count')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 18,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def frac_female(df):\\n\",\n    \"    m_count = df[df['gender'] == \\\"M\\\"]['count'].values[0]\\n\",\n    \"    f_count = df[df['gender'] == \\\"F\\\"]['count'].values[0]\\n\",\n    \"    return {'label': df['label'].values[0], 'frac_female': f_count/(m_count+f_count)}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 19,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"frac_female_df = pd.DataFrame(list(gender_counts.groupby('label', as_index = False).apply(frac_female)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 20,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def compute_tpr(df, _class, _model, threshold = 0.5):\\n\",\n    \"    tpr = metrics.recall_score(df['label_{}'.format(_class)],\\n\",\n    \"                               df['{}_{}'.format(_model,_class)] > threshold)\\n\",\n    \"    return tpr\\n\",\n    \"    \\n\",\n    \"def compute_tpr_by_gender(df, _class, _model, threshold = 0.5):\\n\",\n    \"    tpr_m = compute_tpr(df.query('gender == \\\"M\\\"'), _class, _model, threshold)\\n\",\n    \"    tpr_f = compute_tpr(df.query('gender == \\\"F\\\"'), _class, _model, threshold)\\n\",\n    \"    return {'M': tpr_m, 'F': tpr_f}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 21,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def compute_tpr_tnr(df, _class, _model, threshold = 0.5):\\n\",\n    \"    #cm = metrics.confusion_matrix(df['label_{}'.format(_class)],\\n\",\n    \"    #                              df['{}_{}'.format(_model,_class)] > threshold)\\n\",\n    \"    cm = pd.crosstab(df['label_{}'.format(_class)], df['{}_{}'.format(_model,_class)] > threshold)\\n\",\n    \"    #display(cm)\\n\",\n    \"    if cm.shape[0] > 1:\\n\",\n    \"        tn = cm.iloc[0,0]\\n\",\n    \"        fp = cm.iloc[0,1]\\n\",\n    \"        fn = cm.iloc[1,0]\\n\",\n    \"        tp = cm.iloc[1,1]\\n\",\n    \"        tpr = tp/(tp+fn)\\n\",\n    \"        tnr = tn/(tn+fp)\\n\",\n    \"    else:\\n\",\n    \"        tpr = 0\\n\",\n    \"        tnr = 1\\n\",\n    \"    return tpr, tnr\\n\",\n    \"\\n\",\n    \"def compute_tr_by_gender(df, _class, _model, threshold = 0.5):\\n\",\n    \"    tpr_m, tnr_m = compute_tpr_tnr(df.query('gender == \\\"M\\\"'), _class, _model, threshold)\\n\",\n    \"    tpr_f, tnr_f = compute_tpr_tnr(df.query('gender == \\\"F\\\"'), _class, _model, threshold)\\n\",\n    \"    return {'TPR_m': tpr_m, 'TPR_f': tpr_f, 'TNR_m': tnr_m, 'TNR_f': tnr_f}\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 22,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"for _class in CLASS_NAMES:\\n\",\n    \"    for _model in MODEL_NAMES:\\n\",\n    \"        tpr_1 = compute_tpr(df, _class, _model)\\n\",\n    \"        tpr_2, _ = compute_tpr_tnr(df, _class, _model)\\n\",\n    \"        assert tpr_1 == tpr_2, '{} != {}'.format(tpr_1, tpr_2)\\n\",\n    \"        #print('{} == {}'.format(tpr_1, tpr_2))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 23,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tpr_df = pd.DataFrame()\\n\",\n    \"for _class in frac_female_df.label:\\n\",\n    \"    row = {}\\n\",\n    \"    row['label'] = _class\\n\",\n    \"    for _model, _model_type in MODEL_NAMES.items():\\n\",\n    \"        tpr, tnr = compute_tpr_tnr(df, _class, _model)\\n\",\n    \"        row['{}_tpr'.format(_model_type)] = tpr\\n\",\n    \"        row['{}_tnr'.format(_model_type)] = tnr\\n\",\n    \"        gender_trs = compute_tr_by_gender(df, _class, _model)\\n\",\n    \"        row['{}_tpr_F'.format(_model_type)] = gender_trs['TPR_f']\\n\",\n    \"        row['{}_tpr_M'.format(_model_type)] = gender_trs['TPR_m']\\n\",\n    \"        row['{}_tpr_gender_gap'.format(_model_type)] = gender_trs['TPR_f'] - gender_trs['TPR_m']\\n\",\n    \"        row['{}_tnr_F'.format(_model_type)] = gender_trs['TNR_f']\\n\",\n    \"        row['{}_tnr_M'.format(_model_type)] = gender_trs['TNR_m']\\n\",\n    \"        row['{}_tnr_gender_gap'.format(_model_type)] = gender_trs['TNR_f'] - gender_trs['TNR_m']\\n\",\n    \"    tpr_df = tpr_df.append(row, ignore_index = True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 24,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"results_df = pd.merge(tpr_df, frac_female_df, on = 'label')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 25,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"TITLE_LABELS = [\\n\",\n    \"    'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',\\n\",\n    \"    'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',\\n\",\n    \"    'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',\\n\",\n    \"    'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',\\n\",\n    \"    'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 26,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"results_df['label_profession'] = results_df['label'].apply(lambda x: TITLE_LABELS[int(x)])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 27,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>frac_female</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>frac_female</th>\\n\",\n       \"      <td>1.000000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>debiased_biosbias_tpr_gender_gap</th>\\n\",\n       \"      <td>0.829982</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>very_scrubbed_tpr_gender_gap</th>\\n\",\n       \"      <td>0.458378</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>debiased_tolga_tpr_gender_gap</th>\\n\",\n       \"      <td>0.824882</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_1_tpr_gender_gap</th>\\n\",\n       \"      <td>0.716922</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_no_projection_tpr_gender_gap</th>\\n\",\n       \"      <td>0.709000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_2_tpr_gender_gap</th>\\n\",\n       \"      <td>0.596896</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_no_equalize_tpr_gender_gap</th>\\n\",\n       \"      <td>0.772645</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>glove_tpr_gender_gap</th>\\n\",\n       \"      <td>0.794059</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_4_tpr_gender_gap</th>\\n\",\n       \"      <td>0.550435</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_3_tpr_gender_gap</th>\\n\",\n       \"      <td>0.707174</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>scrubbed_tpr_gender_gap</th>\\n\",\n       \"      <td>-0.282919</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"                                     frac_female\\n\",\n       \"frac_female                             1.000000\\n\",\n       \"debiased_biosbias_tpr_gender_gap        0.829982\\n\",\n       \"very_scrubbed_tpr_gender_gap            0.458378\\n\",\n       \"debiased_tolga_tpr_gender_gap           0.824882\\n\",\n       \"strong_debiased_1_tpr_gender_gap        0.716922\\n\",\n       \"strong_no_projection_tpr_gender_gap     0.709000\\n\",\n       \"strong_debiased_2_tpr_gender_gap        0.596896\\n\",\n       \"strong_no_equalize_tpr_gender_gap       0.772645\\n\",\n       \"glove_tpr_gender_gap                    0.794059\\n\",\n       \"strong_debiased_4_tpr_gender_gap        0.550435\\n\",\n       \"strong_debiased_3_tpr_gender_gap        0.707174\\n\",\n       \"scrubbed_tpr_gender_gap                -0.282919\"\n      ]\n     },\n     \"execution_count\": 27,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]\\n\",\n    \"    \"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 28,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tpr_gender_gap_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\\n\",\n    \"tnr_gender_gap_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 29,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"gender_gap_df = results_df[['label_profession', 'frac_female']+tpr_gender_gap_cols+tnr_gender_gap_cols]\\n\",\n    \"#gender_gap_df.columns = ['label_profession', 'frac_female']+['{}'.format(_model) for _model in MODEL_NAMES.values()]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 30,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>label_profession</th>\\n\",\n       \"      <th>frac_female</th>\\n\",\n       \"      <th>debiased_biosbias_tpr_gender_gap</th>\\n\",\n       \"      <th>very_scrubbed_tpr_gender_gap</th>\\n\",\n       \"      <th>debiased_tolga_tpr_gender_gap</th>\\n\",\n       \"      <th>strong_debiased_1_tpr_gender_gap</th>\\n\",\n       \"      <th>strong_no_projection_tpr_gender_gap</th>\\n\",\n       \"      <th>strong_debiased_2_tpr_gender_gap</th>\\n\",\n       \"      <th>strong_no_equalize_tpr_gender_gap</th>\\n\",\n       \"      <th>glove_tpr_gender_gap</th>\\n\",\n       \"      <th>...</th>\\n\",\n       \"      <th>very_scrubbed_tnr_gender_gap</th>\\n\",\n       \"      <th>debiased_tolga_tnr_gender_gap</th>\\n\",\n       \"      <th>strong_debiased_1_tnr_gender_gap</th>\\n\",\n       \"      <th>strong_no_projection_tnr_gender_gap</th>\\n\",\n       \"      <th>strong_debiased_2_tnr_gender_gap</th>\\n\",\n       \"      <th>strong_no_equalize_tnr_gender_gap</th>\\n\",\n       \"      <th>glove_tnr_gender_gap</th>\\n\",\n       \"      <th>strong_debiased_4_tnr_gender_gap</th>\\n\",\n       \"      <th>strong_debiased_3_tnr_gender_gap</th>\\n\",\n       \"      <th>scrubbed_tnr_gender_gap</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>dietitian</td>\\n\",\n       \"      <td>0.920437</td>\\n\",\n       \"      <td>0.290927</td>\\n\",\n       \"      <td>0.173878</td>\\n\",\n       \"      <td>0.297707</td>\\n\",\n       \"      <td>0.199900</td>\\n\",\n       \"      <td>0.223862</td>\\n\",\n       \"      <td>0.187072</td>\\n\",\n       \"      <td>0.250980</td>\\n\",\n       \"      <td>0.232835</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.000674</td>\\n\",\n       \"      <td>-0.002692</td>\\n\",\n       \"      <td>-0.001220</td>\\n\",\n       \"      <td>-0.001617</td>\\n\",\n       \"      <td>-0.001530</td>\\n\",\n       \"      <td>-0.002213</td>\\n\",\n       \"      <td>-0.002810</td>\\n\",\n       \"      <td>-0.001630</td>\\n\",\n       \"      <td>-0.001412</td>\\n\",\n       \"      <td>-0.000266</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>13</th>\\n\",\n       \"      <td>nurse</td>\\n\",\n       \"      <td>0.914502</td>\\n\",\n       \"      <td>0.082735</td>\\n\",\n       \"      <td>0.013742</td>\\n\",\n       \"      <td>0.085377</td>\\n\",\n       \"      <td>0.048740</td>\\n\",\n       \"      <td>0.033271</td>\\n\",\n       \"      <td>0.025981</td>\\n\",\n       \"      <td>0.057404</td>\\n\",\n       \"      <td>0.082411</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.001686</td>\\n\",\n       \"      <td>-0.007627</td>\\n\",\n       \"      <td>-0.007427</td>\\n\",\n       \"      <td>-0.004478</td>\\n\",\n       \"      <td>-0.004807</td>\\n\",\n       \"      <td>-0.004455</td>\\n\",\n       \"      <td>-0.005866</td>\\n\",\n       \"      <td>-0.002840</td>\\n\",\n       \"      <td>-0.002707</td>\\n\",\n       \"      <td>-0.001573</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>15</th>\\n\",\n       \"      <td>paralegal</td>\\n\",\n       \"      <td>0.866109</td>\\n\",\n       \"      <td>0.375755</td>\\n\",\n       \"      <td>0.094656</td>\\n\",\n       \"      <td>0.317482</td>\\n\",\n       \"      <td>0.262077</td>\\n\",\n       \"      <td>0.256944</td>\\n\",\n       \"      <td>0.271437</td>\\n\",\n       \"      <td>0.314915</td>\\n\",\n       \"      <td>0.271437</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000247</td>\\n\",\n       \"      <td>-0.000103</td>\\n\",\n       \"      <td>0.000095</td>\\n\",\n       \"      <td>-0.000012</td>\\n\",\n       \"      <td>0.000075</td>\\n\",\n       \"      <td>-0.000219</td>\\n\",\n       \"      <td>-0.000164</td>\\n\",\n       \"      <td>-0.000060</td>\\n\",\n       \"      <td>-0.000236</td>\\n\",\n       \"      <td>0.000195</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>27</th>\\n\",\n       \"      <td>yoga_teacher</td>\\n\",\n       \"      <td>0.858696</td>\\n\",\n       \"      <td>0.276534</td>\\n\",\n       \"      <td>0.005518</td>\\n\",\n       \"      <td>0.143784</td>\\n\",\n       \"      <td>0.208049</td>\\n\",\n       \"      <td>0.116196</td>\\n\",\n       \"      <td>0.195067</td>\\n\",\n       \"      <td>0.161636</td>\\n\",\n       \"      <td>0.208374</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000535</td>\\n\",\n       \"      <td>-0.001455</td>\\n\",\n       \"      <td>-0.001289</td>\\n\",\n       \"      <td>-0.000758</td>\\n\",\n       \"      <td>-0.001393</td>\\n\",\n       \"      <td>-0.001211</td>\\n\",\n       \"      <td>-0.001211</td>\\n\",\n       \"      <td>-0.001081</td>\\n\",\n       \"      <td>-0.001399</td>\\n\",\n       \"      <td>0.000299</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>12</th>\\n\",\n       \"      <td>model</td>\\n\",\n       \"      <td>0.818988</td>\\n\",\n       \"      <td>0.480652</td>\\n\",\n       \"      <td>0.176120</td>\\n\",\n       \"      <td>0.544309</td>\\n\",\n       \"      <td>0.418456</td>\\n\",\n       \"      <td>0.460211</td>\\n\",\n       \"      <td>0.455824</td>\\n\",\n       \"      <td>0.532551</td>\\n\",\n       \"      <td>0.505093</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.001022</td>\\n\",\n       \"      <td>-0.000566</td>\\n\",\n       \"      <td>0.000379</td>\\n\",\n       \"      <td>0.000429</td>\\n\",\n       \"      <td>-0.000039</td>\\n\",\n       \"      <td>-0.000513</td>\\n\",\n       \"      <td>-0.001008</td>\\n\",\n       \"      <td>0.000249</td>\\n\",\n       \"      <td>0.000181</td>\\n\",\n       \"      <td>0.001161</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>interior_designer</td>\\n\",\n       \"      <td>0.782609</td>\\n\",\n       \"      <td>0.182716</td>\\n\",\n       \"      <td>-0.013580</td>\\n\",\n       \"      <td>0.243210</td>\\n\",\n       \"      <td>0.081481</td>\\n\",\n       \"      <td>0.096296</td>\\n\",\n       \"      <td>0.041975</td>\\n\",\n       \"      <td>0.224691</td>\\n\",\n       \"      <td>0.270370</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000226</td>\\n\",\n       \"      <td>-0.000884</td>\\n\",\n       \"      <td>0.000032</td>\\n\",\n       \"      <td>-0.000023</td>\\n\",\n       \"      <td>0.000024</td>\\n\",\n       \"      <td>-0.000676</td>\\n\",\n       \"      <td>-0.000201</td>\\n\",\n       \"      <td>0.000216</td>\\n\",\n       \"      <td>-0.000013</td>\\n\",\n       \"      <td>0.000407</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>22</th>\\n\",\n       \"      <td>psychologist</td>\\n\",\n       \"      <td>0.620751</td>\\n\",\n       \"      <td>0.000799</td>\\n\",\n       \"      <td>0.008890</td>\\n\",\n       \"      <td>0.045876</td>\\n\",\n       \"      <td>0.043524</td>\\n\",\n       \"      <td>0.045169</td>\\n\",\n       \"      <td>0.020219</td>\\n\",\n       \"      <td>0.042056</td>\\n\",\n       \"      <td>0.017593</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.000742</td>\\n\",\n       \"      <td>-0.005913</td>\\n\",\n       \"      <td>-0.004535</td>\\n\",\n       \"      <td>-0.002672</td>\\n\",\n       \"      <td>-0.002096</td>\\n\",\n       \"      <td>-0.004275</td>\\n\",\n       \"      <td>-0.002278</td>\\n\",\n       \"      <td>-0.003760</td>\\n\",\n       \"      <td>-0.002820</td>\\n\",\n       \"      <td>-0.001450</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>26</th>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>0.604382</td>\\n\",\n       \"      <td>0.111221</td>\\n\",\n       \"      <td>0.025352</td>\\n\",\n       \"      <td>0.129299</td>\\n\",\n       \"      <td>0.111760</td>\\n\",\n       \"      <td>0.113756</td>\\n\",\n       \"      <td>0.114246</td>\\n\",\n       \"      <td>0.119168</td>\\n\",\n       \"      <td>0.137121</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.000813</td>\\n\",\n       \"      <td>-0.004694</td>\\n\",\n       \"      <td>-0.002497</td>\\n\",\n       \"      <td>-0.004570</td>\\n\",\n       \"      <td>-0.001141</td>\\n\",\n       \"      <td>-0.002609</td>\\n\",\n       \"      <td>-0.002664</td>\\n\",\n       \"      <td>-0.002461</td>\\n\",\n       \"      <td>-0.001785</td>\\n\",\n       \"      <td>0.000671</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>journalist</td>\\n\",\n       \"      <td>0.492152</td>\\n\",\n       \"      <td>0.019865</td>\\n\",\n       \"      <td>0.010182</td>\\n\",\n       \"      <td>0.057554</td>\\n\",\n       \"      <td>0.021920</td>\\n\",\n       \"      <td>0.001790</td>\\n\",\n       \"      <td>0.013070</td>\\n\",\n       \"      <td>0.042923</td>\\n\",\n       \"      <td>0.058686</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000762</td>\\n\",\n       \"      <td>-0.000167</td>\\n\",\n       \"      <td>0.001286</td>\\n\",\n       \"      <td>0.001514</td>\\n\",\n       \"      <td>0.001955</td>\\n\",\n       \"      <td>0.000651</td>\\n\",\n       \"      <td>0.000014</td>\\n\",\n       \"      <td>0.001617</td>\\n\",\n       \"      <td>0.001571</td>\\n\",\n       \"      <td>-0.002623</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>19</th>\\n\",\n       \"      <td>physician</td>\\n\",\n       \"      <td>0.491203</td>\\n\",\n       \"      <td>0.019845</td>\\n\",\n       \"      <td>0.036850</td>\\n\",\n       \"      <td>0.056989</td>\\n\",\n       \"      <td>0.035120</td>\\n\",\n       \"      <td>0.042554</td>\\n\",\n       \"      <td>0.040719</td>\\n\",\n       \"      <td>0.034896</td>\\n\",\n       \"      <td>0.024797</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001413</td>\\n\",\n       \"      <td>0.005790</td>\\n\",\n       \"      <td>0.006125</td>\\n\",\n       \"      <td>0.006385</td>\\n\",\n       \"      <td>0.006968</td>\\n\",\n       \"      <td>0.004761</td>\\n\",\n       \"      <td>0.007537</td>\\n\",\n       \"      <td>0.001844</td>\\n\",\n       \"      <td>0.004126</td>\\n\",\n       \"      <td>0.000307</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>20</th>\\n\",\n       \"      <td>poet</td>\\n\",\n       \"      <td>0.483051</td>\\n\",\n       \"      <td>-0.044163</td>\\n\",\n       \"      <td>0.009395</td>\\n\",\n       \"      <td>-0.007190</td>\\n\",\n       \"      <td>0.012207</td>\\n\",\n       \"      <td>0.006903</td>\\n\",\n       \"      <td>-0.006711</td>\\n\",\n       \"      <td>0.016393</td>\\n\",\n       \"      <td>0.001949</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000827</td>\\n\",\n       \"      <td>-0.000845</td>\\n\",\n       \"      <td>-0.001125</td>\\n\",\n       \"      <td>-0.000642</td>\\n\",\n       \"      <td>-0.000209</td>\\n\",\n       \"      <td>-0.000453</td>\\n\",\n       \"      <td>-0.000933</td>\\n\",\n       \"      <td>-0.000733</td>\\n\",\n       \"      <td>-0.000761</td>\\n\",\n       \"      <td>-0.000698</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>17</th>\\n\",\n       \"      <td>personal_trainer</td>\\n\",\n       \"      <td>0.468293</td>\\n\",\n       \"      <td>-0.080944</td>\\n\",\n       \"      <td>-0.011850</td>\\n\",\n       \"      <td>-0.068043</td>\\n\",\n       \"      <td>0.032397</td>\\n\",\n       \"      <td>-0.028670</td>\\n\",\n       \"      <td>-0.037557</td>\\n\",\n       \"      <td>-0.091361</td>\\n\",\n       \"      <td>-0.049694</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.000783</td>\\n\",\n       \"      <td>-0.000399</td>\\n\",\n       \"      <td>-0.001005</td>\\n\",\n       \"      <td>-0.000138</td>\\n\",\n       \"      <td>-0.000473</td>\\n\",\n       \"      <td>-0.000470</td>\\n\",\n       \"      <td>-0.000456</td>\\n\",\n       \"      <td>-0.000816</td>\\n\",\n       \"      <td>-0.000737</td>\\n\",\n       \"      <td>0.000032</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>21</th>\\n\",\n       \"      <td>professor</td>\\n\",\n       \"      <td>0.452428</td>\\n\",\n       \"      <td>-0.018119</td>\\n\",\n       \"      <td>0.011301</td>\\n\",\n       \"      <td>-0.011141</td>\\n\",\n       \"      <td>-0.015243</td>\\n\",\n       \"      <td>-0.012384</td>\\n\",\n       \"      <td>0.002382</td>\\n\",\n       \"      <td>-0.004640</td>\\n\",\n       \"      <td>-0.002251</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.001640</td>\\n\",\n       \"      <td>0.001259</td>\\n\",\n       \"      <td>0.001349</td>\\n\",\n       \"      <td>0.004071</td>\\n\",\n       \"      <td>-0.003034</td>\\n\",\n       \"      <td>-0.004298</td>\\n\",\n       \"      <td>-0.003673</td>\\n\",\n       \"      <td>-0.000717</td>\\n\",\n       \"      <td>-0.003564</td>\\n\",\n       \"      <td>-0.001995</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>14</th>\\n\",\n       \"      <td>painter</td>\\n\",\n       \"      <td>0.452361</td>\\n\",\n       \"      <td>0.003161</td>\\n\",\n       \"      <td>0.036012</td>\\n\",\n       \"      <td>0.017337</td>\\n\",\n       \"      <td>-0.035538</td>\\n\",\n       \"      <td>0.012959</td>\\n\",\n       \"      <td>0.006991</td>\\n\",\n       \"      <td>-0.001613</td>\\n\",\n       \"      <td>-0.002095</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.000951</td>\\n\",\n       \"      <td>-0.000336</td>\\n\",\n       \"      <td>-0.000125</td>\\n\",\n       \"      <td>-0.000197</td>\\n\",\n       \"      <td>0.000173</td>\\n\",\n       \"      <td>0.000315</td>\\n\",\n       \"      <td>-0.000022</td>\\n\",\n       \"      <td>-0.000223</td>\\n\",\n       \"      <td>0.000050</td>\\n\",\n       \"      <td>0.000144</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>accountant</td>\\n\",\n       \"      <td>0.374554</td>\\n\",\n       \"      <td>-0.055930</td>\\n\",\n       \"      <td>-0.031311</td>\\n\",\n       \"      <td>-0.043805</td>\\n\",\n       \"      <td>-0.025312</td>\\n\",\n       \"      <td>0.000459</td>\\n\",\n       \"      <td>-0.015143</td>\\n\",\n       \"      <td>-0.044432</td>\\n\",\n       \"      <td>-0.060287</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001341</td>\\n\",\n       \"      <td>0.000090</td>\\n\",\n       \"      <td>0.000390</td>\\n\",\n       \"      <td>0.000694</td>\\n\",\n       \"      <td>0.000483</td>\\n\",\n       \"      <td>0.000683</td>\\n\",\n       \"      <td>0.000757</td>\\n\",\n       \"      <td>0.000344</td>\\n\",\n       \"      <td>0.000355</td>\\n\",\n       \"      <td>-0.000109</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>attorney</td>\\n\",\n       \"      <td>0.367104</td>\\n\",\n       \"      <td>-0.035824</td>\\n\",\n       \"      <td>-0.003903</td>\\n\",\n       \"      <td>-0.007270</td>\\n\",\n       \"      <td>0.007254</td>\\n\",\n       \"      <td>0.013928</td>\\n\",\n       \"      <td>0.004176</td>\\n\",\n       \"      <td>-0.010897</td>\\n\",\n       \"      <td>-0.004719</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.003337</td>\\n\",\n       \"      <td>-0.000622</td>\\n\",\n       \"      <td>-0.001509</td>\\n\",\n       \"      <td>-0.001953</td>\\n\",\n       \"      <td>-0.001427</td>\\n\",\n       \"      <td>-0.001875</td>\\n\",\n       \"      <td>-0.002338</td>\\n\",\n       \"      <td>-0.002469</td>\\n\",\n       \"      <td>-0.001728</td>\\n\",\n       \"      <td>-0.000642</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>18</th>\\n\",\n       \"      <td>photographer</td>\\n\",\n       \"      <td>0.356927</td>\\n\",\n       \"      <td>-0.052775</td>\\n\",\n       \"      <td>-0.011488</td>\\n\",\n       \"      <td>-0.036094</td>\\n\",\n       \"      <td>-0.004054</td>\\n\",\n       \"      <td>-0.017355</td>\\n\",\n       \"      <td>-0.004763</td>\\n\",\n       \"      <td>-0.035910</td>\\n\",\n       \"      <td>-0.031379</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001568</td>\\n\",\n       \"      <td>0.000615</td>\\n\",\n       \"      <td>-0.000121</td>\\n\",\n       \"      <td>-0.000051</td>\\n\",\n       \"      <td>-0.000025</td>\\n\",\n       \"      <td>0.001542</td>\\n\",\n       \"      <td>0.001537</td>\\n\",\n       \"      <td>-0.000092</td>\\n\",\n       \"      <td>0.000170</td>\\n\",\n       \"      <td>-0.000297</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"      <td>0.345824</td>\\n\",\n       \"      <td>0.009651</td>\\n\",\n       \"      <td>-0.040738</td>\\n\",\n       \"      <td>0.003124</td>\\n\",\n       \"      <td>0.013102</td>\\n\",\n       \"      <td>0.014166</td>\\n\",\n       \"      <td>0.008104</td>\\n\",\n       \"      <td>0.017242</td>\\n\",\n       \"      <td>0.015563</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000945</td>\\n\",\n       \"      <td>0.000738</td>\\n\",\n       \"      <td>0.000532</td>\\n\",\n       \"      <td>0.000574</td>\\n\",\n       \"      <td>0.000563</td>\\n\",\n       \"      <td>0.000409</td>\\n\",\n       \"      <td>0.000801</td>\\n\",\n       \"      <td>0.000516</td>\\n\",\n       \"      <td>0.000359</td>\\n\",\n       \"      <td>0.000283</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>filmmaker</td>\\n\",\n       \"      <td>0.322148</td>\\n\",\n       \"      <td>-0.005893</td>\\n\",\n       \"      <td>-0.023485</td>\\n\",\n       \"      <td>-0.017356</td>\\n\",\n       \"      <td>0.038690</td>\\n\",\n       \"      <td>0.032797</td>\\n\",\n       \"      <td>0.018358</td>\\n\",\n       \"      <td>-0.019507</td>\\n\",\n       \"      <td>-0.001827</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000127</td>\\n\",\n       \"      <td>0.002068</td>\\n\",\n       \"      <td>0.001236</td>\\n\",\n       \"      <td>0.001535</td>\\n\",\n       \"      <td>0.001641</td>\\n\",\n       \"      <td>0.001847</td>\\n\",\n       \"      <td>0.002094</td>\\n\",\n       \"      <td>0.001204</td>\\n\",\n       \"      <td>0.000814</td>\\n\",\n       \"      <td>0.002261</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>chiropractor</td>\\n\",\n       \"      <td>0.298824</td>\\n\",\n       \"      <td>-0.025604</td>\\n\",\n       \"      <td>-0.004360</td>\\n\",\n       \"      <td>-0.073746</td>\\n\",\n       \"      <td>-0.023146</td>\\n\",\n       \"      <td>0.024071</td>\\n\",\n       \"      <td>-0.021350</td>\\n\",\n       \"      <td>-0.011547</td>\\n\",\n       \"      <td>-0.028457</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.000152</td>\\n\",\n       \"      <td>0.000921</td>\\n\",\n       \"      <td>0.000345</td>\\n\",\n       \"      <td>0.000175</td>\\n\",\n       \"      <td>0.000617</td>\\n\",\n       \"      <td>0.000419</td>\\n\",\n       \"      <td>0.000127</td>\\n\",\n       \"      <td>0.000096</td>\\n\",\n       \"      <td>0.000309</td>\\n\",\n       \"      <td>0.000236</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>16</th>\\n\",\n       \"      <td>pastor</td>\\n\",\n       \"      <td>0.229282</td>\\n\",\n       \"      <td>-0.274172</td>\\n\",\n       \"      <td>-0.069785</td>\\n\",\n       \"      <td>-0.259533</td>\\n\",\n       \"      <td>-0.096731</td>\\n\",\n       \"      <td>-0.127909</td>\\n\",\n       \"      <td>-0.156583</td>\\n\",\n       \"      <td>-0.218206</td>\\n\",\n       \"      <td>-0.166127</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.000073</td>\\n\",\n       \"      <td>0.001051</td>\\n\",\n       \"      <td>0.000741</td>\\n\",\n       \"      <td>0.000602</td>\\n\",\n       \"      <td>0.000453</td>\\n\",\n       \"      <td>0.001137</td>\\n\",\n       \"      <td>0.001293</td>\\n\",\n       \"      <td>0.000333</td>\\n\",\n       \"      <td>0.000333</td>\\n\",\n       \"      <td>0.000199</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>architect</td>\\n\",\n       \"      <td>0.225399</td>\\n\",\n       \"      <td>0.053551</td>\\n\",\n       \"      <td>0.003069</td>\\n\",\n       \"      <td>0.003208</td>\\n\",\n       \"      <td>0.106769</td>\\n\",\n       \"      <td>0.110808</td>\\n\",\n       \"      <td>0.073486</td>\\n\",\n       \"      <td>-0.005593</td>\\n\",\n       \"      <td>0.049996</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>-0.001115</td>\\n\",\n       \"      <td>0.004353</td>\\n\",\n       \"      <td>0.001076</td>\\n\",\n       \"      <td>0.001065</td>\\n\",\n       \"      <td>0.002815</td>\\n\",\n       \"      <td>0.005941</td>\\n\",\n       \"      <td>0.002935</td>\\n\",\n       \"      <td>0.002423</td>\\n\",\n       \"      <td>0.003652</td>\\n\",\n       \"      <td>-0.001770</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>comedian</td>\\n\",\n       \"      <td>0.219457</td>\\n\",\n       \"      <td>-0.225967</td>\\n\",\n       \"      <td>-0.081757</td>\\n\",\n       \"      <td>-0.156671</td>\\n\",\n       \"      <td>-0.065501</td>\\n\",\n       \"      <td>-0.076109</td>\\n\",\n       \"      <td>-0.087733</td>\\n\",\n       \"      <td>-0.118004</td>\\n\",\n       \"      <td>-0.124757</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000234</td>\\n\",\n       \"      <td>0.000593</td>\\n\",\n       \"      <td>-0.000030</td>\\n\",\n       \"      <td>0.000366</td>\\n\",\n       \"      <td>-0.000133</td>\\n\",\n       \"      <td>0.000633</td>\\n\",\n       \"      <td>0.000753</td>\\n\",\n       \"      <td>0.000327</td>\\n\",\n       \"      <td>-0.000055</td>\\n\",\n       \"      <td>0.000581</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>24</th>\\n\",\n       \"      <td>software_engineer</td>\\n\",\n       \"      <td>0.157746</td>\\n\",\n       \"      <td>-0.065456</td>\\n\",\n       \"      <td>0.023591</td>\\n\",\n       \"      <td>-0.056956</td>\\n\",\n       \"      <td>-0.042324</td>\\n\",\n       \"      <td>-0.060300</td>\\n\",\n       \"      <td>-0.021202</td>\\n\",\n       \"      <td>0.015468</td>\\n\",\n       \"      <td>-0.036829</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001209</td>\\n\",\n       \"      <td>0.005100</td>\\n\",\n       \"      <td>0.009260</td>\\n\",\n       \"      <td>0.007132</td>\\n\",\n       \"      <td>0.006266</td>\\n\",\n       \"      <td>0.003512</td>\\n\",\n       \"      <td>0.006474</td>\\n\",\n       \"      <td>0.004885</td>\\n\",\n       \"      <td>0.004344</td>\\n\",\n       \"      <td>-0.000074</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>25</th>\\n\",\n       \"      <td>surgeon</td>\\n\",\n       \"      <td>0.153592</td>\\n\",\n       \"      <td>-0.229816</td>\\n\",\n       \"      <td>-0.051839</td>\\n\",\n       \"      <td>-0.245461</td>\\n\",\n       \"      <td>-0.122859</td>\\n\",\n       \"      <td>-0.127233</td>\\n\",\n       \"      <td>-0.089205</td>\\n\",\n       \"      <td>-0.220015</td>\\n\",\n       \"      <td>-0.207968</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.002435</td>\\n\",\n       \"      <td>0.005888</td>\\n\",\n       \"      <td>0.004638</td>\\n\",\n       \"      <td>0.002488</td>\\n\",\n       \"      <td>0.002320</td>\\n\",\n       \"      <td>0.004059</td>\\n\",\n       \"      <td>0.005013</td>\\n\",\n       \"      <td>0.003432</td>\\n\",\n       \"      <td>0.003316</td>\\n\",\n       \"      <td>0.000101</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>composer</td>\\n\",\n       \"      <td>0.153186</td>\\n\",\n       \"      <td>-0.068712</td>\\n\",\n       \"      <td>0.036272</td>\\n\",\n       \"      <td>-0.048370</td>\\n\",\n       \"      <td>-0.001737</td>\\n\",\n       \"      <td>-0.050061</td>\\n\",\n       \"      <td>-0.008452</td>\\n\",\n       \"      <td>-0.064452</td>\\n\",\n       \"      <td>-0.063849</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001154</td>\\n\",\n       \"      <td>0.001802</td>\\n\",\n       \"      <td>0.001463</td>\\n\",\n       \"      <td>0.001543</td>\\n\",\n       \"      <td>0.001389</td>\\n\",\n       \"      <td>0.001676</td>\\n\",\n       \"      <td>0.001567</td>\\n\",\n       \"      <td>0.001011</td>\\n\",\n       \"      <td>0.001245</td>\\n\",\n       \"      <td>0.002464</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>dj</td>\\n\",\n       \"      <td>0.145299</td>\\n\",\n       \"      <td>-0.103824</td>\\n\",\n       \"      <td>0.099118</td>\\n\",\n       \"      <td>-0.145000</td>\\n\",\n       \"      <td>0.027647</td>\\n\",\n       \"      <td>-0.083824</td>\\n\",\n       \"      <td>0.000882</td>\\n\",\n       \"      <td>-0.178824</td>\\n\",\n       \"      <td>-0.040588</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000244</td>\\n\",\n       \"      <td>0.001133</td>\\n\",\n       \"      <td>0.000257</td>\\n\",\n       \"      <td>0.000211</td>\\n\",\n       \"      <td>0.000721</td>\\n\",\n       \"      <td>0.000564</td>\\n\",\n       \"      <td>0.000206</td>\\n\",\n       \"      <td>0.000826</td>\\n\",\n       \"      <td>0.000299</td>\\n\",\n       \"      <td>-0.000025</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>23</th>\\n\",\n       \"      <td>rapper</td>\\n\",\n       \"      <td>0.085859</td>\\n\",\n       \"      <td>-0.138772</td>\\n\",\n       \"      <td>0.047449</td>\\n\",\n       \"      <td>-0.096198</td>\\n\",\n       \"      <td>0.017225</td>\\n\",\n       \"      <td>0.030224</td>\\n\",\n       \"      <td>0.175496</td>\\n\",\n       \"      <td>0.019175</td>\\n\",\n       \"      <td>-0.012350</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.000577</td>\\n\",\n       \"      <td>0.000155</td>\\n\",\n       \"      <td>0.000631</td>\\n\",\n       \"      <td>0.000491</td>\\n\",\n       \"      <td>0.000268</td>\\n\",\n       \"      <td>0.000834</td>\\n\",\n       \"      <td>0.001200</td>\\n\",\n       \"      <td>0.000645</td>\\n\",\n       \"      <td>0.000180</td>\\n\",\n       \"      <td>-0.000048</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"<p>28 rows × 24 columns</p>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"     label_profession  frac_female  debiased_biosbias_tpr_gender_gap  \\\\\\n\",\n       \"7           dietitian     0.920437                          0.290927   \\n\",\n       \"13              nurse     0.914502                          0.082735   \\n\",\n       \"15          paralegal     0.866109                          0.375755   \\n\",\n       \"27       yoga_teacher     0.858696                          0.276534   \\n\",\n       \"12              model     0.818988                          0.480652   \\n\",\n       \"10  interior_designer     0.782609                          0.182716   \\n\",\n       \"22       psychologist     0.620751                          0.000799   \\n\",\n       \"26            teacher     0.604382                          0.111221   \\n\",\n       \"11         journalist     0.492152                          0.019865   \\n\",\n       \"19          physician     0.491203                          0.019845   \\n\",\n       \"20               poet     0.483051                         -0.044163   \\n\",\n       \"17   personal_trainer     0.468293                         -0.080944   \\n\",\n       \"21          professor     0.452428                         -0.018119   \\n\",\n       \"14            painter     0.452361                          0.003161   \\n\",\n       \"0          accountant     0.374554                         -0.055930   \\n\",\n       \"2            attorney     0.367104                         -0.035824   \\n\",\n       \"18       photographer     0.356927                         -0.052775   \\n\",\n       \"6             dentist     0.345824                          0.009651   \\n\",\n       \"9           filmmaker     0.322148                         -0.005893   \\n\",\n       \"3        chiropractor     0.298824                         -0.025604   \\n\",\n       \"16             pastor     0.229282                         -0.274172   \\n\",\n       \"1           architect     0.225399                          0.053551   \\n\",\n       \"4            comedian     0.219457                         -0.225967   \\n\",\n       \"24  software_engineer     0.157746                         -0.065456   \\n\",\n       \"25            surgeon     0.153592                         -0.229816   \\n\",\n       \"5            composer     0.153186                         -0.068712   \\n\",\n       \"8                  dj     0.145299                         -0.103824   \\n\",\n       \"23             rapper     0.085859                         -0.138772   \\n\",\n       \"\\n\",\n       \"    very_scrubbed_tpr_gender_gap  debiased_tolga_tpr_gender_gap  \\\\\\n\",\n       \"7                       0.173878                       0.297707   \\n\",\n       \"13                      0.013742                       0.085377   \\n\",\n       \"15                      0.094656                       0.317482   \\n\",\n       \"27                      0.005518                       0.143784   \\n\",\n       \"12                      0.176120                       0.544309   \\n\",\n       \"10                     -0.013580                       0.243210   \\n\",\n       \"22                      0.008890                       0.045876   \\n\",\n       \"26                      0.025352                       0.129299   \\n\",\n       \"11                      0.010182                       0.057554   \\n\",\n       \"19                      0.036850                       0.056989   \\n\",\n       \"20                      0.009395                      -0.007190   \\n\",\n       \"17                     -0.011850                      -0.068043   \\n\",\n       \"21                      0.011301                      -0.011141   \\n\",\n       \"14                      0.036012                       0.017337   \\n\",\n       \"0                      -0.031311                      -0.043805   \\n\",\n       \"2                      -0.003903                      -0.007270   \\n\",\n       \"18                     -0.011488                      -0.036094   \\n\",\n       \"6                      -0.040738                       0.003124   \\n\",\n       \"9                      -0.023485                      -0.017356   \\n\",\n       \"3                      -0.004360                      -0.073746   \\n\",\n       \"16                     -0.069785                      -0.259533   \\n\",\n       \"1                       0.003069                       0.003208   \\n\",\n       \"4                      -0.081757                      -0.156671   \\n\",\n       \"24                      0.023591                      -0.056956   \\n\",\n       \"25                     -0.051839                      -0.245461   \\n\",\n       \"5                       0.036272                      -0.048370   \\n\",\n       \"8                       0.099118                      -0.145000   \\n\",\n       \"23                      0.047449                      -0.096198   \\n\",\n       \"\\n\",\n       \"    strong_debiased_1_tpr_gender_gap  strong_no_projection_tpr_gender_gap  \\\\\\n\",\n       \"7                           0.199900                             0.223862   \\n\",\n       \"13                          0.048740                             0.033271   \\n\",\n       \"15                          0.262077                             0.256944   \\n\",\n       \"27                          0.208049                             0.116196   \\n\",\n       \"12                          0.418456                             0.460211   \\n\",\n       \"10                          0.081481                             0.096296   \\n\",\n       \"22                          0.043524                             0.045169   \\n\",\n       \"26                          0.111760                             0.113756   \\n\",\n       \"11                          0.021920                             0.001790   \\n\",\n       \"19                          0.035120                             0.042554   \\n\",\n       \"20                          0.012207                             0.006903   \\n\",\n       \"17                          0.032397                            -0.028670   \\n\",\n       \"21                         -0.015243                            -0.012384   \\n\",\n       \"14                         -0.035538                             0.012959   \\n\",\n       \"0                          -0.025312                             0.000459   \\n\",\n       \"2                           0.007254                             0.013928   \\n\",\n       \"18                         -0.004054                            -0.017355   \\n\",\n       \"6                           0.013102                             0.014166   \\n\",\n       \"9                           0.038690                             0.032797   \\n\",\n       \"3                          -0.023146                             0.024071   \\n\",\n       \"16                         -0.096731                            -0.127909   \\n\",\n       \"1                           0.106769                             0.110808   \\n\",\n       \"4                          -0.065501                            -0.076109   \\n\",\n       \"24                         -0.042324                            -0.060300   \\n\",\n       \"25                         -0.122859                            -0.127233   \\n\",\n       \"5                          -0.001737                            -0.050061   \\n\",\n       \"8                           0.027647                            -0.083824   \\n\",\n       \"23                          0.017225                             0.030224   \\n\",\n       \"\\n\",\n       \"    strong_debiased_2_tpr_gender_gap  strong_no_equalize_tpr_gender_gap  \\\\\\n\",\n       \"7                           0.187072                           0.250980   \\n\",\n       \"13                          0.025981                           0.057404   \\n\",\n       \"15                          0.271437                           0.314915   \\n\",\n       \"27                          0.195067                           0.161636   \\n\",\n       \"12                          0.455824                           0.532551   \\n\",\n       \"10                          0.041975                           0.224691   \\n\",\n       \"22                          0.020219                           0.042056   \\n\",\n       \"26                          0.114246                           0.119168   \\n\",\n       \"11                          0.013070                           0.042923   \\n\",\n       \"19                          0.040719                           0.034896   \\n\",\n       \"20                         -0.006711                           0.016393   \\n\",\n       \"17                         -0.037557                          -0.091361   \\n\",\n       \"21                          0.002382                          -0.004640   \\n\",\n       \"14                          0.006991                          -0.001613   \\n\",\n       \"0                          -0.015143                          -0.044432   \\n\",\n       \"2                           0.004176                          -0.010897   \\n\",\n       \"18                         -0.004763                          -0.035910   \\n\",\n       \"6                           0.008104                           0.017242   \\n\",\n       \"9                           0.018358                          -0.019507   \\n\",\n       \"3                          -0.021350                          -0.011547   \\n\",\n       \"16                         -0.156583                          -0.218206   \\n\",\n       \"1                           0.073486                          -0.005593   \\n\",\n       \"4                          -0.087733                          -0.118004   \\n\",\n       \"24                         -0.021202                           0.015468   \\n\",\n       \"25                         -0.089205                          -0.220015   \\n\",\n       \"5                          -0.008452                          -0.064452   \\n\",\n       \"8                           0.000882                          -0.178824   \\n\",\n       \"23                          0.175496                           0.019175   \\n\",\n       \"\\n\",\n       \"    glove_tpr_gender_gap           ...             \\\\\\n\",\n       \"7               0.232835           ...              \\n\",\n       \"13              0.082411           ...              \\n\",\n       \"15              0.271437           ...              \\n\",\n       \"27              0.208374           ...              \\n\",\n       \"12              0.505093           ...              \\n\",\n       \"10              0.270370           ...              \\n\",\n       \"22              0.017593           ...              \\n\",\n       \"26              0.137121           ...              \\n\",\n       \"11              0.058686           ...              \\n\",\n       \"19              0.024797           ...              \\n\",\n       \"20              0.001949           ...              \\n\",\n       \"17             -0.049694           ...              \\n\",\n       \"21             -0.002251           ...              \\n\",\n       \"14             -0.002095           ...              \\n\",\n       \"0              -0.060287           ...              \\n\",\n       \"2              -0.004719           ...              \\n\",\n       \"18             -0.031379           ...              \\n\",\n       \"6               0.015563           ...              \\n\",\n       \"9              -0.001827           ...              \\n\",\n       \"3              -0.028457           ...              \\n\",\n       \"16             -0.166127           ...              \\n\",\n       \"1               0.049996           ...              \\n\",\n       \"4              -0.124757           ...              \\n\",\n       \"24             -0.036829           ...              \\n\",\n       \"25             -0.207968           ...              \\n\",\n       \"5              -0.063849           ...              \\n\",\n       \"8              -0.040588           ...              \\n\",\n       \"23             -0.012350           ...              \\n\",\n       \"\\n\",\n       \"    very_scrubbed_tnr_gender_gap  debiased_tolga_tnr_gender_gap  \\\\\\n\",\n       \"7                      -0.000674                      -0.002692   \\n\",\n       \"13                     -0.001686                      -0.007627   \\n\",\n       \"15                      0.000247                      -0.000103   \\n\",\n       \"27                      0.000535                      -0.001455   \\n\",\n       \"12                     -0.001022                      -0.000566   \\n\",\n       \"10                      0.000226                      -0.000884   \\n\",\n       \"22                     -0.000742                      -0.005913   \\n\",\n       \"26                     -0.000813                      -0.004694   \\n\",\n       \"11                      0.000762                      -0.000167   \\n\",\n       \"19                      0.001413                       0.005790   \\n\",\n       \"20                      0.000827                      -0.000845   \\n\",\n       \"17                     -0.000783                      -0.000399   \\n\",\n       \"21                     -0.001640                       0.001259   \\n\",\n       \"14                     -0.000951                      -0.000336   \\n\",\n       \"0                       0.001341                       0.000090   \\n\",\n       \"2                       0.003337                      -0.000622   \\n\",\n       \"18                      0.001568                       0.000615   \\n\",\n       \"6                       0.000945                       0.000738   \\n\",\n       \"9                       0.000127                       0.002068   \\n\",\n       \"3                      -0.000152                       0.000921   \\n\",\n       \"16                     -0.000073                       0.001051   \\n\",\n       \"1                      -0.001115                       0.004353   \\n\",\n       \"4                       0.000234                       0.000593   \\n\",\n       \"24                      0.001209                       0.005100   \\n\",\n       \"25                      0.002435                       0.005888   \\n\",\n       \"5                       0.001154                       0.001802   \\n\",\n       \"8                       0.000244                       0.001133   \\n\",\n       \"23                      0.000577                       0.000155   \\n\",\n       \"\\n\",\n       \"    strong_debiased_1_tnr_gender_gap  strong_no_projection_tnr_gender_gap  \\\\\\n\",\n       \"7                          -0.001220                            -0.001617   \\n\",\n       \"13                         -0.007427                            -0.004478   \\n\",\n       \"15                          0.000095                            -0.000012   \\n\",\n       \"27                         -0.001289                            -0.000758   \\n\",\n       \"12                          0.000379                             0.000429   \\n\",\n       \"10                          0.000032                            -0.000023   \\n\",\n       \"22                         -0.004535                            -0.002672   \\n\",\n       \"26                         -0.002497                            -0.004570   \\n\",\n       \"11                          0.001286                             0.001514   \\n\",\n       \"19                          0.006125                             0.006385   \\n\",\n       \"20                         -0.001125                            -0.000642   \\n\",\n       \"17                         -0.001005                            -0.000138   \\n\",\n       \"21                          0.001349                             0.004071   \\n\",\n       \"14                         -0.000125                            -0.000197   \\n\",\n       \"0                           0.000390                             0.000694   \\n\",\n       \"2                          -0.001509                            -0.001953   \\n\",\n       \"18                         -0.000121                            -0.000051   \\n\",\n       \"6                           0.000532                             0.000574   \\n\",\n       \"9                           0.001236                             0.001535   \\n\",\n       \"3                           0.000345                             0.000175   \\n\",\n       \"16                          0.000741                             0.000602   \\n\",\n       \"1                           0.001076                             0.001065   \\n\",\n       \"4                          -0.000030                             0.000366   \\n\",\n       \"24                          0.009260                             0.007132   \\n\",\n       \"25                          0.004638                             0.002488   \\n\",\n       \"5                           0.001463                             0.001543   \\n\",\n       \"8                           0.000257                             0.000211   \\n\",\n       \"23                          0.000631                             0.000491   \\n\",\n       \"\\n\",\n       \"    strong_debiased_2_tnr_gender_gap  strong_no_equalize_tnr_gender_gap  \\\\\\n\",\n       \"7                          -0.001530                          -0.002213   \\n\",\n       \"13                         -0.004807                          -0.004455   \\n\",\n       \"15                          0.000075                          -0.000219   \\n\",\n       \"27                         -0.001393                          -0.001211   \\n\",\n       \"12                         -0.000039                          -0.000513   \\n\",\n       \"10                          0.000024                          -0.000676   \\n\",\n       \"22                         -0.002096                          -0.004275   \\n\",\n       \"26                         -0.001141                          -0.002609   \\n\",\n       \"11                          0.001955                           0.000651   \\n\",\n       \"19                          0.006968                           0.004761   \\n\",\n       \"20                         -0.000209                          -0.000453   \\n\",\n       \"17                         -0.000473                          -0.000470   \\n\",\n       \"21                         -0.003034                          -0.004298   \\n\",\n       \"14                          0.000173                           0.000315   \\n\",\n       \"0                           0.000483                           0.000683   \\n\",\n       \"2                          -0.001427                          -0.001875   \\n\",\n       \"18                         -0.000025                           0.001542   \\n\",\n       \"6                           0.000563                           0.000409   \\n\",\n       \"9                           0.001641                           0.001847   \\n\",\n       \"3                           0.000617                           0.000419   \\n\",\n       \"16                          0.000453                           0.001137   \\n\",\n       \"1                           0.002815                           0.005941   \\n\",\n       \"4                          -0.000133                           0.000633   \\n\",\n       \"24                          0.006266                           0.003512   \\n\",\n       \"25                          0.002320                           0.004059   \\n\",\n       \"5                           0.001389                           0.001676   \\n\",\n       \"8                           0.000721                           0.000564   \\n\",\n       \"23                          0.000268                           0.000834   \\n\",\n       \"\\n\",\n       \"    glove_tnr_gender_gap  strong_debiased_4_tnr_gender_gap  \\\\\\n\",\n       \"7              -0.002810                         -0.001630   \\n\",\n       \"13             -0.005866                         -0.002840   \\n\",\n       \"15             -0.000164                         -0.000060   \\n\",\n       \"27             -0.001211                         -0.001081   \\n\",\n       \"12             -0.001008                          0.000249   \\n\",\n       \"10             -0.000201                          0.000216   \\n\",\n       \"22             -0.002278                         -0.003760   \\n\",\n       \"26             -0.002664                         -0.002461   \\n\",\n       \"11              0.000014                          0.001617   \\n\",\n       \"19              0.007537                          0.001844   \\n\",\n       \"20             -0.000933                         -0.000733   \\n\",\n       \"17             -0.000456                         -0.000816   \\n\",\n       \"21             -0.003673                         -0.000717   \\n\",\n       \"14             -0.000022                         -0.000223   \\n\",\n       \"0               0.000757                          0.000344   \\n\",\n       \"2              -0.002338                         -0.002469   \\n\",\n       \"18              0.001537                         -0.000092   \\n\",\n       \"6               0.000801                          0.000516   \\n\",\n       \"9               0.002094                          0.001204   \\n\",\n       \"3               0.000127                          0.000096   \\n\",\n       \"16              0.001293                          0.000333   \\n\",\n       \"1               0.002935                          0.002423   \\n\",\n       \"4               0.000753                          0.000327   \\n\",\n       \"24              0.006474                          0.004885   \\n\",\n       \"25              0.005013                          0.003432   \\n\",\n       \"5               0.001567                          0.001011   \\n\",\n       \"8               0.000206                          0.000826   \\n\",\n       \"23              0.001200                          0.000645   \\n\",\n       \"\\n\",\n       \"    strong_debiased_3_tnr_gender_gap  scrubbed_tnr_gender_gap  \\n\",\n       \"7                          -0.001412                -0.000266  \\n\",\n       \"13                         -0.002707                -0.001573  \\n\",\n       \"15                         -0.000236                 0.000195  \\n\",\n       \"27                         -0.001399                 0.000299  \\n\",\n       \"12                          0.000181                 0.001161  \\n\",\n       \"10                         -0.000013                 0.000407  \\n\",\n       \"22                         -0.002820                -0.001450  \\n\",\n       \"26                         -0.001785                 0.000671  \\n\",\n       \"11                          0.001571                -0.002623  \\n\",\n       \"19                          0.004126                 0.000307  \\n\",\n       \"20                         -0.000761                -0.000698  \\n\",\n       \"17                         -0.000737                 0.000032  \\n\",\n       \"21                         -0.003564                -0.001995  \\n\",\n       \"14                          0.000050                 0.000144  \\n\",\n       \"0                           0.000355                -0.000109  \\n\",\n       \"2                          -0.001728                -0.000642  \\n\",\n       \"18                          0.000170                -0.000297  \\n\",\n       \"6                           0.000359                 0.000283  \\n\",\n       \"9                           0.000814                 0.002261  \\n\",\n       \"3                           0.000309                 0.000236  \\n\",\n       \"16                          0.000333                 0.000199  \\n\",\n       \"1                           0.003652                -0.001770  \\n\",\n       \"4                          -0.000055                 0.000581  \\n\",\n       \"24                          0.004344                -0.000074  \\n\",\n       \"25                          0.003316                 0.000101  \\n\",\n       \"5                           0.001245                 0.002464  \\n\",\n       \"8                           0.000299                -0.000025  \\n\",\n       \"23                          0.000180                -0.000048  \\n\",\n       \"\\n\",\n       \"[28 rows x 24 columns]\"\n      ]\n     },\n     \"execution_count\": 30,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"gender_gap_df.sort_values('frac_female', ascending = False)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 31,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Fraction of comments where new model has lower\\n\",\n    \"# TPR gap than the baseline\\n\",\n    \"\\n\",\n    \"def compute_fraction_improved(df, baseline_model, improved_model):\\n\",\n    \"    is_improved = np.abs(df[baseline_model]) >= np.abs(df[improved_model])\\n\",\n    \"    return np.mean(is_improved)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 32,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"debiased_biosbias\\n\",\n      \"0.32142857142857145\\n\",\n      \"very_scrubbed\\n\",\n      \"0.7142857142857143\\n\",\n      \"debiased_tolga\\n\",\n      \"0.2857142857142857\\n\",\n      \"strong_debiased_1\\n\",\n      \"0.6428571428571429\\n\",\n      \"strong_no_projection\\n\",\n      \"0.6071428571428571\\n\",\n      \"strong_debiased_2\\n\",\n      \"0.7142857142857143\\n\",\n      \"strong_no_equalize\\n\",\n      \"0.39285714285714285\\n\",\n      \"glove\\n\",\n      \"1.0\\n\",\n      \"strong_debiased_4\\n\",\n      \"0.6071428571428571\\n\",\n      \"strong_debiased_3\\n\",\n      \"0.6071428571428571\\n\",\n      \"scrubbed\\n\",\n      \"0.8571428571428571\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"for _model in MODEL_NAMES.values():\\n\",\n    \"    print(_model)\\n\",\n    \"    print(compute_fraction_improved(gender_gap_df, 'glove_tpr_gender_gap', '{}_tpr_gender_gap'.format(_model)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 33,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tpr_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\\n\",\n    \"tnr_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]\\n\",\n    \"gender_gap_cols = tpr_cols + tnr_cols\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 34,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"debiased_biosbias_tpr_gender_gap       0.029446\\n\",\n       \"very_scrubbed_tpr_gender_gap           0.003786\\n\",\n       \"debiased_tolga_tpr_gender_gap          0.028584\\n\",\n       \"strong_debiased_1_tpr_gender_gap       0.014313\\n\",\n       \"strong_no_projection_tpr_gender_gap    0.015602\\n\",\n       \"strong_debiased_2_tpr_gender_gap       0.016134\\n\",\n       \"strong_no_equalize_tpr_gender_gap      0.025152\\n\",\n       \"glove_tpr_gender_gap                   0.022636\\n\",\n       \"strong_debiased_4_tpr_gender_gap       0.016461\\n\",\n       \"strong_debiased_3_tpr_gender_gap       0.014632\\n\",\n       \"scrubbed_tpr_gender_gap                0.000189\\n\",\n       \"debiased_biosbias_tnr_gender_gap       0.000011\\n\",\n       \"very_scrubbed_tnr_gender_gap           0.000001\\n\",\n       \"debiased_tolga_tnr_gender_gap          0.000009\\n\",\n       \"strong_debiased_1_tnr_gender_gap       0.000009\\n\",\n       \"strong_no_projection_tnr_gender_gap    0.000006\\n\",\n       \"strong_debiased_2_tnr_gender_gap       0.000006\\n\",\n       \"strong_no_equalize_tnr_gender_gap      0.000006\\n\",\n       \"glove_tnr_gender_gap                   0.000008\\n\",\n       \"strong_debiased_4_tnr_gender_gap       0.000003\\n\",\n       \"strong_debiased_3_tnr_gender_gap       0.000004\\n\",\n       \"scrubbed_tnr_gender_gap                0.000001\\n\",\n       \"dtype: float64\"\n      ]\n     },\n     \"execution_count\": 34,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(x**2))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 35,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"debiased_biosbias_tpr_gender_gap       0.119049\\n\",\n       \"very_scrubbed_tpr_gender_gap           0.041268\\n\",\n       \"debiased_tolga_tpr_gender_gap          0.114932\\n\",\n       \"strong_debiased_1_tpr_gender_gap       0.075670\\n\",\n       \"strong_no_projection_tpr_gender_gap    0.079293\\n\",\n       \"strong_debiased_2_tpr_gender_gap       0.075149\\n\",\n       \"strong_no_equalize_tpr_gender_gap      0.102661\\n\",\n       \"glove_tpr_gender_gap                   0.096764\\n\",\n       \"strong_debiased_4_tpr_gender_gap       0.083171\\n\",\n       \"strong_debiased_3_tpr_gender_gap       0.070882\\n\",\n       \"scrubbed_tpr_gender_gap                0.007773\\n\",\n       \"debiased_biosbias_tnr_gender_gap       0.002204\\n\",\n       \"very_scrubbed_tnr_gender_gap           0.000958\\n\",\n       \"debiased_tolga_tnr_gender_gap          0.002066\\n\",\n       \"strong_debiased_1_tnr_gender_gap       0.001811\\n\",\n       \"strong_no_projection_tnr_gender_gap    0.001657\\n\",\n       \"strong_debiased_2_tnr_gender_gap       0.001537\\n\",\n       \"strong_no_equalize_tnr_gender_gap      0.001866\\n\",\n       \"glove_tnr_gender_gap                   0.001997\\n\",\n       \"strong_debiased_4_tnr_gender_gap       0.001316\\n\",\n       \"strong_debiased_3_tnr_gender_gap       0.001376\\n\",\n       \"scrubbed_tnr_gender_gap                0.000747\\n\",\n       \"dtype: float64\"\n      ]\n     },\n     \"execution_count\": 35,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(np.abs(x)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 36,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def plot_tpr_gap(df, _model):\\n\",\n    \"    fig, ax = plt.subplots(figsize=(15, 6))\\n\",\n    \"    x = 'frac_female'\\n\",\n    \"    y = '{}_tpr_gender_gap'.format(_model)\\n\",\n    \"    p1 = sns.regplot(x = x, y = y, data = df)\\n\",\n    \"    p1.set(xlabel = \\\"% Female\\\", ylabel = \\\"TPR Gender Gap\\\", title = _model)\\n\",\n    \"\\n\",\n    \"    for line in range(0,df.shape[0]):\\n\",\n    \"         p1.text(results_df[x][line]+0.01, df[y][line], df['label_profession'][line], horizontalalignment='left', size='medium', color='black')\\n\",\n    \"    plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 37,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"for _model in MODEL_NAMES.values():\\n\",\n    \"    if 'untuned' in _model:\\n\",\n    \"        plot_tpr_gap(results_df, _model)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 38,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>frac_female</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>frac_female</th>\\n\",\n       \"      <td>1.000000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>debiased_biosbias_tpr_gender_gap</th>\\n\",\n       \"      <td>0.829982</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>very_scrubbed_tpr_gender_gap</th>\\n\",\n       \"      <td>0.458378</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>debiased_tolga_tpr_gender_gap</th>\\n\",\n       \"      <td>0.824882</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_1_tpr_gender_gap</th>\\n\",\n       \"      <td>0.716922</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_no_projection_tpr_gender_gap</th>\\n\",\n       \"      <td>0.709000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_2_tpr_gender_gap</th>\\n\",\n       \"      <td>0.596896</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_no_equalize_tpr_gender_gap</th>\\n\",\n       \"      <td>0.772645</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>glove_tpr_gender_gap</th>\\n\",\n       \"      <td>0.794059</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_4_tpr_gender_gap</th>\\n\",\n       \"      <td>0.550435</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>strong_debiased_3_tpr_gender_gap</th>\\n\",\n       \"      <td>0.707174</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>scrubbed_tpr_gender_gap</th>\\n\",\n       \"      <td>-0.282919</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"                                     frac_female\\n\",\n       \"frac_female                             1.000000\\n\",\n       \"debiased_biosbias_tpr_gender_gap        0.829982\\n\",\n       \"very_scrubbed_tpr_gender_gap            0.458378\\n\",\n       \"debiased_tolga_tpr_gender_gap           0.824882\\n\",\n       \"strong_debiased_1_tpr_gender_gap        0.716922\\n\",\n       \"strong_no_projection_tpr_gender_gap     0.709000\\n\",\n       \"strong_debiased_2_tpr_gender_gap        0.596896\\n\",\n       \"strong_no_equalize_tpr_gender_gap       0.772645\\n\",\n       \"glove_tpr_gender_gap                    0.794059\\n\",\n       \"strong_debiased_4_tpr_gender_gap        0.550435\\n\",\n       \"strong_debiased_3_tpr_gender_gap        0.707174\\n\",\n       \"scrubbed_tpr_gender_gap                -0.282919\"\n      ]\n     },\n     \"execution_count\": 38,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Gender Prediction Analysis\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 39,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Which model does this correspond to?\\n\",\n    \"model_name = 'tf_gru_attention_multiclass_gender_biosbias_glove:v_20190405_142640'\\n\",\n    \"gender_df['correct'] = ((gender_df['gender'] == 'M') == gender_df[model_name])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 40,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Accuracy: 0.8423\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"acc = gender_df.correct.sum()/gender_df.correct.count()\\n\",\n    \"print('Accuracy: {:.4f}'.format(acc))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"models_eval_py2\",\n   \"language\": \"python\",\n   \"name\": \"models_eval_py2\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 2\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython2\",\n   \"version\": \"2.7.10\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "model_evaluation/Predict bias.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 54,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import tensorflow as tf\\n\",\n    \"from tensorflow.contrib.framework.python.framework import checkpoint_utils\\n\",\n    \"\\n\",\n    \"from sklearn.metrics.pairwise import cosine_similarity\\n\",\n    \"from sklearn.preprocessing import normalize\\n\",\n    \"import numpy as np\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 20,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_DIR_OCCUPATION = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103117/model_dir/model.ckpt-100000'\\n\",\n    \"MODEL_DIR_GENDER = 'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_warmstart_biosbias_glove/20190404_151521/model_dir/model.ckpt-191000'\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Extract two matrices.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 23,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"('dense/bias', [1])\\n\",\n      \"('dense/bias/Adam', [1])\\n\",\n      \"('dense/bias/Adam_1', [1])\\n\",\n      \"('dense/kernel', [256, 1])\\n\",\n      \"('dense/kernel/Adam', [256, 1])\\n\",\n      \"('dense/kernel/Adam_1', [256, 1])\\n\",\n      \"('dense_1/bias', [128])\\n\",\n      \"('dense_1/bias/Adam', [128])\\n\",\n      \"('dense_1/bias/Adam_1', [128])\\n\",\n      \"('dense_1/kernel', [256, 128])\\n\",\n      \"('dense_1/kernel/Adam', [256, 128])\\n\",\n      \"('dense_1/kernel/Adam_1', [256, 128])\\n\",\n      \"('dense_2/bias', [33])\\n\",\n      \"('dense_2/bias/Adam', [33])\\n\",\n      \"('dense_2/bias/Adam_1', [33])\\n\",\n      \"('dense_2/kernel', [128, 33])\\n\",\n      \"('dense_2/kernel/Adam', [128, 33])\\n\",\n      \"('dense_2/kernel/Adam_1', [128, 33])\\n\",\n      \"('embeddings', [400002, 100])\\n\",\n      \"('global_step', [])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam', [256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias/Adam_1', [256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam', [356, 256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel/Adam_1', [356, 256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam', [512])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias/Adam_1', [512])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam', [356, 512])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel/Adam_1', [356, 512])\\n\",\n      \"('signal_early_stopping/STOP', [])\\n\",\n      \"('title/beta1_power', [])\\n\",\n      \"('title/beta2_power', [])\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"var_list = checkpoint_utils.list_variables(MODEL_DIR_OCCUPATION)\\n\",\n    \"for v in var_list:\\n\",\n    \"    print(v)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 99,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"kernel_occupation = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_OCCUPATION, 'dense_2/kernel'))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 100,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"('beta1_power', [])\\n\",\n      \"('beta2_power', [])\\n\",\n      \"('dense/bias', [1])\\n\",\n      \"('dense/kernel', [256, 1])\\n\",\n      \"('dense_1/bias', [128])\\n\",\n      \"('dense_1/kernel', [256, 128])\\n\",\n      \"('embeddings', [400002, 100])\\n\",\n      \"('final_layer/bias', [33])\\n\",\n      \"('final_layer/bias/Adam', [33])\\n\",\n      \"('final_layer/bias/Adam_1', [33])\\n\",\n      \"('final_layer/kernel', [128, 33])\\n\",\n      \"('final_layer/kernel/Adam', [128, 33])\\n\",\n      \"('final_layer/kernel/Adam_1', [128, 33])\\n\",\n      \"('global_step', [])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/bias', [256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/candidate/kernel', [356, 256])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/bias', [512])\\n\",\n      \"('rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel', [356, 512])\\n\",\n      \"('signal_early_stopping/STOP', [])\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"var_list = checkpoint_utils.list_variables(MODEL_DIR_GENDER)\\n\",\n    \"for v in var_list:\\n\",\n    \"    print(v)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 101,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"kernel_gender = np.transpose(checkpoint_utils.load_variable(MODEL_DIR_GENDER, 'final_layer/kernel'))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Compute cosine.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 102,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"TITLE_LABELS = [\\n\",\n    \"    'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',\\n\",\n    \"    'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',\\n\",\n    \"    'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',\\n\",\n    \"    'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',\\n\",\n    \"    'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 103,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"(33, 128)\"\n      ]\n     },\n     \"execution_count\": 103,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"kernel_gender.shape\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 104,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"kernel_gender_female = normalize(kernel_gender[0].reshape(1, -1))\\n\",\n    \"kernel_gender_male = normalize(kernel_gender[1].reshape(1, -1))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 105,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"array([[ 0.01618018,  0.1003583 , -0.0723118 ,  0.06453013,  0.22758739,\\n\",\n       \"         0.06790616,  0.08027411,  0.10015733, -0.05590729,  0.023418  ,\\n\",\n       \"         0.06255525, -0.02604564,  0.09049062, -0.01601316,  0.08945937,\\n\",\n       \"        -0.11582728,  0.06244883,  0.07855629,  0.01956639, -0.06774757,\\n\",\n       \"         0.00614625, -0.03594974,  0.0652191 , -0.05078628, -0.00807877,\\n\",\n       \"         0.06896302,  0.11013658, -0.04664179,  0.11593511,  0.17774113,\\n\",\n       \"         0.09496382,  0.12176205,  0.04098931, -0.0970282 ,  0.02898299,\\n\",\n       \"         0.10654851, -0.13562816,  0.03486229,  0.12194955,  0.02276845,\\n\",\n       \"         0.04589143, -0.06606348, -0.00129113, -0.07973252, -0.02630814,\\n\",\n       \"        -0.09769032, -0.1640446 , -0.07602697,  0.00429134,  0.06098389,\\n\",\n       \"         0.02934178, -0.07209212, -0.11304612,  0.29547158, -0.04287611,\\n\",\n       \"        -0.04518875, -0.02993831,  0.06304532,  0.07989506, -0.09601919,\\n\",\n       \"         0.20816126, -0.1977993 ,  0.15119584,  0.01456547,  0.06435941,\\n\",\n       \"        -0.07794361, -0.00554093,  0.05497926,  0.0931736 ,  0.22706528,\\n\",\n       \"        -0.08019326, -0.0819607 ,  0.04490028, -0.01723337,  0.04124108,\\n\",\n       \"         0.13199665, -0.01417105,  0.0725795 , -0.05172402, -0.13563272,\\n\",\n       \"        -0.07302421,  0.24843292,  0.14667384, -0.02692026,  0.15892392,\\n\",\n       \"         0.02655477, -0.00804625,  0.00184608,  0.02203059,  0.00078905,\\n\",\n       \"         0.0115315 ,  0.00199543,  0.05942026,  0.07089076, -0.04697848,\\n\",\n       \"        -0.01500242, -0.02432874, -0.02453819, -0.13443194, -0.00370577,\\n\",\n       \"        -0.03219581, -0.07874984, -0.05446392,  0.05492223, -0.11461313,\\n\",\n       \"        -0.00379655,  0.01339969, -0.01030909,  0.0601744 ,  0.00417376,\\n\",\n       \"        -0.02308951, -0.1329045 , -0.00130105,  0.0959954 ,  0.03397062,\\n\",\n       \"         0.11269465,  0.00561908,  0.00870924,  0.0339431 ,  0.01517005,\\n\",\n       \"        -0.05439634, -0.02544309, -0.13284749,  0.04113958,  0.03033615,\\n\",\n       \"        -0.08890872, -0.09986325, -0.09274729]], dtype=float32)\"\n      ]\n     },\n     \"execution_count\": 105,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"kernel_gender_male\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 106,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"kernel_gender_mean = normalize((kernel_gender_female + kernel_gender_male) / 2)\\n\",\n    \"direction_male = kernel_gender_male - np.sum(np.multiply(kernel_gender_male, kernel_gender_mean))* kernel_gender_mean\\n\",\n    \"direction_female = kernel_gender_female - np.sum(np.multiply(kernel_gender_female, kernel_gender_mean))* kernel_gender_mean\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 107,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"array([[-1.]], dtype=float32)\"\n      ]\n     },\n     \"execution_count\": 107,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"cosine_similarity(direction_female, direction_male)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 114,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Compute mean\\n\",\n    \"kernel_occupation_mean = np.mean(kernel_occupation, axis=0)\\n\",\n    \"# Apply  x - np.sum(np.multiply(x, mean))* mean\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 119,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"accountant:-0.166758477688\\n\",\n      \"acupuncturist:0.0150433778763\\n\",\n      \"architect:-0.106728702784\\n\",\n      \"attorney:-0.0355984941125\\n\",\n      \"chiropractor:-0.112065583467\\n\",\n      \"comedian:-0.17996160686\\n\",\n      \"composer:-0.154989466071\\n\",\n      \"dentist:-0.00389941781759\\n\",\n      \"dietitian:0.00302037596703\\n\",\n      \"dj:-0.156128510833\\n\",\n      \"filmmaker:-0.116180986166\\n\",\n      \"interior_designer:-0.00478803366423\\n\",\n      \"journalist:-0.0217301938683\\n\",\n      \"landscape_architect:-0.00763043016195\\n\",\n      \"magician:-0.00733107328415\\n\",\n      \"massage_therapist:-0.0116159021854\\n\",\n      \"model:0.0549785941839\\n\",\n      \"nurse:0.099561393261\\n\",\n      \"painter:0.0174702480435\\n\",\n      \"paralegal:0.0106164813042\\n\",\n      \"pastor:-0.161623597145\\n\",\n      \"personal_trainer:-0.133440434933\\n\",\n      \"photographer:-0.0985902026296\\n\",\n      \"physician:-0.00131351128221\\n\",\n      \"poet:-0.061441861093\\n\",\n      \"professor:0.00782079994678\\n\",\n      \"psychologist:0.00208866596222\\n\",\n      \"rapper:-0.112389668822\\n\",\n      \"real_estate_broker:-0.000683411955833\\n\",\n      \"software_engineer:-0.0237298682332\\n\",\n      \"surgeon:-0.0968104675412\\n\",\n      \"teacher:-0.0625882595778\\n\",\n      \"yoga_teacher:0.0292760580778\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"for i in range(33):\\n\",\n    \"#     _bias = np.abs(cosine_similarity(kernel_gender_female, kernel_occupation[i].reshape(1, -1))) + \\\\\\n\",\n    \"#                   np.abs(cosine_similarity(kernel_gender_male, kernel_occupation[i].reshape(1, -1)))\\n\",\n    \"\\n\",\n    \"    _bias = (cosine_similarity(kernel_gender_female - kernel_gender_male, kernel_occupation[i].reshape(1, -1)))\\n\",\n    \"\\n\",\n    \"#     _bias = cosine_similarity(direction_male, kernel_occupation[i].reshape(1, -1))\\n\",\n    \"\\n\",\n    \"#     x = kernel_occupation[i].reshape(1, -1)\\n\",\n    \"#     x = x - np.sum(np.multiply(x, kernel_occupation_mean))* kernel_occupation_mean\\n\",\n    \"#     _bias = cosine_similarity(direction_male, x)\\n\",\n    \"\\n\",\n    \"    print ('{}:{}'.format(TITLE_LABELS[i], float(_bias)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 2\",\n   \"language\": \"python\",\n   \"name\": \"python2\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 2\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython2\",\n   \"version\": \"2.7.14+\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "model_evaluation/README.md",
    "content": "# Evaluation Pipeline for Text classification models.\n\nThis directory contains utilities to use a model deployed on cloud MLE (in 'utils_export/'), and some notebooks to illustrate a typical evaluation pipeline.\n\n## Environment Setup\n\n### Python Dependencies\n\nInstall library dependencies (it is optional, but recommended to install these\nin a [Virtual Environment](https://docs.python.org/3/tutorial/venv.html):\n\n```shell\n# The python2 way to create and use virtual environment\n# (optional, but recommended):\nvirtualenv .pyenv\nsource .pyenv/bin/activate\n# Install dependencies\npip install -r requirements.txt\n\njupyter notebook\n\n# ... do stuff ...\n\n# Exit your virtual environment.\ndeactivate\n```\n\n### Google Cloud Storage dependencies\n\nIf you need to access data located in Google Cloud Storage, you must [install the Google Cloud SDK](https://cloud.google.com/sdk/docs/) and initialize it within your virtual environment.\n\n## Evaluating one model\n\nThe notebook `jigsaw_evaluation_pipeline.ipynb` provides a example of running on evaluation metrics for the ml-fairness project.\n\nWe use the `Dataset` and `Model` utilities from `utils_export/` to interact with the models deployed on CMLE and execute the following steps:\n * Load two datasets: 1 dataset to evaluate performance (or intended bias) similar to the training data, and 1 dataset to evaluate the unintended bias that includes identity information.\n * Run the model on each dataset and collect the predictions.\n * Compute evaluation metrics: AUC on the first dataset, pinned_auc on the second one.\n\n\n## Evaluating several models\n\nThis is useful to compare different training runs (with different parameters) but also to compare the evaluation metrics during the training run (several models exported during 1 training run).\n\nTODO(fprost): Write description once the notebook is pushed\n\n\n## Cloud MLE utilities\n\nThe utility library `utils_export/` intends to simplify the use of CMLE deployed models.\n\n### Typical usage pattern\n\nThis library will handle the following \"overhead\" tasks:\n * Convert your pandas `DataFrame` into tf-records, adding an `example_key` to each example.\n * Send an HTTP request to CMLE to run a batch prediction job.\n * Wait for job completion.\n * Parse prediction files and join results with the initial `DataFrame` based on `example_key`.\n\n\n```python\ninput_fn = ... (returns pandas DataFrame).\ndataset = Dataset(input_fn, dataset_dir)\n\ndataset.load_data(10000)\n\nmodel = Model(...)\ndataset.add_model_prediction_to_data(model)\nOR\ndataset.add_model_prediction_to_data(model, recompute=False)\n\ndataset.show_data()\n```\n\n### `Model`\n\nA `Model` instance describes the key components of a CMLE model.\n\nKey parameters are:\n * how to access the model: project_name, model_names.\n * what the expected inputs to the models are and their respective types (see EncodingFeatureSpec). The types are important to find the right encoding function for TF-records.\n * what the model outputs are.\n\nExample:\n```python\nmodel = Model(\n    feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING},\n    prediction_keys='prediction_key',\n    model_names=['model_name1:version1', 'model_name1:version2', 'model_name2:version1']\n    project_name='wikidetox')\n```\n\n\n### `Dataset`\n\nA `Dataset` instance is related to a pandas `DataFrame` and will be progressively augmented with the model predictions.\n\nThe dataset attributes are:\n * `input_fn`: a function that returns a `DataFrame` (input_data).\n * `DATASET_DIR`: where to save/load all the files associated with the `Dataset`, in particular input_tf_records and cloud mle predictions.\n"
  },
  {
    "path": "model_evaluation/deploy_models.sh",
    "content": "#!/bin/bash\n\nMODEL_DIRS='gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103329/model_dir,'\\\n'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103300/model_dir,'\\\n'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103254/model_dir,'\\\n'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103245/model_dir,'\\\n'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103232/model_dir,'\\\n'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103209/model_dir,'\\\n'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103152/model_dir,'\\\n'gs://conversationai-models/tf_trainer_runs/fprost/tf_gru_attention_multiclass_biosbias_glove/20190328_103117/model_dir'\n\n\npython utils_export/deploy_list_models.py --list_model_dir=$MODEL_DIRS --model_name 'tf_test_fprost'\n"
  },
  {
    "path": "model_evaluation/few_shot_learning_baseline_evaluation.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%load_ext autoreload\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%autoreload 2\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from __future__ import absolute_import\\n\",\n    \"from __future__ import division\\n\",\n    \"from __future__ import print_function\\n\",\n    \"\\n\",\n    \"import getpass\\n\",\n    \"from IPython.display import display\\n\",\n    \"import json\\n\",\n    \"import nltk\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas as pd\\n\",\n    \"import pkg_resources\\n\",\n    \"import os\\n\",\n    \"import random\\n\",\n    \"import re\\n\",\n    \"import seaborn as sns\\n\",\n    \"import sklearn.metrics as metrics\\n\",\n    \"\\n\",\n    \"import tensorflow as tf\\n\",\n    \"from tensorflow.python.lib.io import file_io\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from utils_export.dataset import Dataset, Model\\n\",\n    \"from utils_export import utils_cloudml\\n\",\n    \"from utils_export import utils_tfrecords\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Faster to access GCS file:\\n\",\n    \"# https://github.com/tensorflow/tensorflow/issues/15530\\n\",\n    \"os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"[nltk_data] Downloading package punkt to /Users/msushkov/nltk_data...\\n\",\n      \"[nltk_data]   Package punkt is already up-to-date!\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"True\"\n      ]\n     },\n     \"execution_count\": 6,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"nltk.download('punkt')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def tokenizer(text, lowercase=True):\\n\",\n    \"  \\\"\\\"\\\"Converts text to a list of words.\\n\",\n    \"\\n\",\n    \"  Args:\\n\",\n    \"    text: piece of text to tokenize (string).\\n\",\n    \"    lowercase: whether to include lowercasing in preprocessing (bool).\\n\",\n    \"\\n\",\n    \"  Returns:\\n\",\n    \"    A list of strings (words).\\n\",\n    \"  \\\"\\\"\\\"\\n\",\n    \"  words = nltk.word_tokenize(text.decode('utf-8'))\\n\",\n    \"  if lowercase:\\n\",\n    \"    words = [w.lower() for w in words]\\n\",\n    \"  return words\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def make_test_input_fn(dataset_path,\\n\",\n    \"                       model_text_feature,\\n\",\n    \"                       dataset_text_feature,\\n\",\n    \"                       data_label,\\n\",\n    \"                       tokenizer_fn,\\n\",\n    \"                       label_data_type=tf.float32,\\n\",\n    \"                       max_n_examples=None,\\n\",\n    \"                       random_filter_keep_rate=1.0):\\n\",\n    \"    \\\"\\\"\\\"Returns a test input function.\\n\",\n    \"    \\n\",\n    \"    Args:\\n\",\n    \"      dataset_path (str): Path to dataset.\\n\",\n    \"      model_text_feature (str): The feature column corresponding to the\\n\",\n    \"        text input the model expects.\\n\",\n    \"      dataset_text_feature (str): The name of the text feature of the dataset.\\n\",\n    \"      data_label (str): The output label for the dataset.\\n\",\n    \"      tokenizer_fn: Tokenizer function (str -> list).\\n\",\n    \"      max_n_examples (int): How many examples to evaluate on.\\n\",\n    \"      random_filter_keep_rate (float): Filter out test examples with this probability.\\n\",\n    \"\\n\",\n    \"    Returns:\\n\",\n    \"      Test input function.\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    decoding_input_features = {\\n\",\n    \"      dataset_text_feature: tf.FixedLenFeature([], dtype=tf.string),\\n\",\n    \"      data_label: tf.FixedLenFeature([], dtype=label_data_type)\\n\",\n    \"    }\\n\",\n    \"\\n\",\n    \"    def test_input_fn(max_n_examples=max_n_examples,\\n\",\n    \"                      random_filter_keep_rate=random_filter_keep_rate):\\n\",\n    \"        \\\"\\\"\\\"Test input function.\\n\",\n    \"        \\n\",\n    \"        Args:\\n\",\n    \"          max_n_examples (int): How many examples to evaluate on.\\n\",\n    \"          random_filter_keep_rate (float): Filter out test examples with this probability.\\n\",\n    \"          \\n\",\n    \"        Returns:\\n\",\n    \"          DataFrame with the results.\\n\",\n    \"        \\\"\\\"\\\"\\n\",\n    \"        res = utils_tfrecords.decode_tf_records_to_pandas(\\n\",\n    \"            decoding_input_features,\\n\",\n    \"            dataset_path,\\n\",\n    \"            max_n_examples,\\n\",\n    \"            random_filter_keep_rate)\\n\",\n    \"        if not tokenizer_fn:\\n\",\n    \"            tok = lambda x: [x]\\n\",\n    \"            res[model_text_feature] = list(map(tok, res[dataset_text_feature]))\\n\",\n    \"        else:\\n\",\n    \"            res[model_text_feature] = list(map(tokenizer_fn, res[dataset_text_feature]))\\n\",\n    \"        res = res.rename(columns={ data_label: 'label' })\\n\",\n    \"        res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))\\n\",\n    \"        final = res.copy(deep=True)\\n\",\n    \"        return final\\n\",\n    \"\\n\",\n    \"    return test_input_fn\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def print_results(results_df, model_names, print_pr_curve=False):\\n\",\n    \"    \\\"\\\"\\\"Print the classification results.\\n\",\n    \"    \\n\",\n    \"    Args:\\n\",\n    \"      results_df: DataFrame with the results.\\n\",\n    \"      model_names: List of strings representing the models for which we have results.\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    labels = results_df['label']\\n\",\n    \"    for _model in model_names:\\n\",\n    \"        print(_model)\\n\",\n    \"        model_preds = results_df[_model]\\n\",\n    \"        fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)\\n\",\n    \"        roc_auc = metrics.auc(fpr, tpr)\\n\",\n    \"        recalls, precisions, thr = metrics.precision_recall_curve(labels, model_preds)\\n\",\n    \"        pr_auc = metrics.auc(precisions, recalls)\\n\",\n    \"        model_preds_binary = (model_preds > 0.5).astype(np.int_)\\n\",\n    \"        f1 = metrics.f1_score(labels, model_preds_binary)\\n\",\n    \"        print('\\\\tROC AUC: {}'.format(roc_auc))\\n\",\n    \"        print('\\\\tPR AUC: {}'.format(pr_auc))\\n\",\n    \"        print('\\\\tF1: {}'.format(f1))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 10,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"PROJECT_NAME = 'conversationai-models'\\n\",\n    \"SENTENCE_KEY = 'comment_key' #Input key\\n\",\n    \"\\n\",\n    \"# Pattern for path of tf_records\\n\",\n    \"OUTPUT_DIR_BASE = os.path.join(\\n\",\n    \"    'gs://conversationai-models',\\n\",\n    \"    getpass.getuser(),\\n\",\n    \"    'tfrecords')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Evaluate models on Civil Comments dataset\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 175,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"LABEL_NAME_PREDICTION_MODEL = 'toxicity/logistic'\\n\",\n    \"DATASET = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/test-*.tfrecord'\\n\",\n    \"DATA_LABEL = 'toxicity'\\n\",\n    \"DATASET_TEXT_FEATURE='comment_text'\\n\",\n    \"\\n\",\n    \"# Pattern for path of tf_records\\n\",\n    \"OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'civil_comments_test')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### CNN, GRU Attention Models\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 140,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'tokens'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_cnn_civil_comments_glove:v_20190219_185541',\\n\",\n    \"    'tf_gru_attention_civil_comments_glove:v_20190219_185619',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, tokenizer)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\\n\",\n    \"test_dataset.load_data(10000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 143,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"civil_comments_test_df = test_dataset.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 144,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_cnn_civil_comments_glove:v_20190219_185541\\n\",\n      \"\\tROC AUC: 0.9573435242534393\\n\",\n      \"\\tPR AUC: 0.6729934425219886\\n\",\n      \"tf_gru_attention_civil_comments_glove:v_20190219_185619\\n\",\n      \"\\tROC AUC: 0.9649161132104584\\n\",\n      \"\\tPR AUC: 0.7486011745102973\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(civil_comments_test_df, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### TF-Hub Model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 189,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'text'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, None)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\\n\",\n    \"test_dataset.load_data(10000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 193,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"civil_comments_hub_df = test_dataset.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 194,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552\\n\",\n      \"\\tROC AUC: 0.9595451744696132\\n\",\n      \"\\tPR AUC: 0.7429338592289392\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(civil_comments_hub_df, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Evaluate models on Toxicity dataset\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 195,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"LABEL_NAME_PREDICTION_MODEL = 'frac_neg/logistic'\\n\",\n    \"DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'\\n\",\n    \"DATA_LABEL = 'frac_neg'\\n\",\n    \"DATASET_TEXT_FEATURE='comment_text'\\n\",\n    \"\\n\",\n    \"# Pattern for path of tf_records\\n\",\n    \"OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'toxicity_test')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### CNN, GRU Attention Models\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 162,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'tokens'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_cnn_toxicity_glove:v_20190219_185532',\\n\",\n    \"    'tf_gru_attention_toxicity_glove:v_20190219_185516',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, tokenizer)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\\n\",\n    \"test_dataset.load_data(10000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 149,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"toxicity_test_df1 = test_dataset.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 150,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_cnn_toxicity_glove:v_20190219_185532\\n\",\n      \"\\tROC AUC: 0.951760553925346\\n\",\n      \"\\tPR AUC: 0.8740274773143215\\n\",\n      \"tf_gru_attention_toxicity_glove:v_20190219_185516\\n\",\n      \"\\tROC AUC: 0.9543916575133977\\n\",\n      \"\\tPR AUC: 0.8814208812923074\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(toxicity_test_df1, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### TF-Hub Model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 196,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'text'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, None)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\\n\",\n    \"test_dataset.load_data(10000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 200,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"toxicity_test_df2 = test_dataset.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 201,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427\\n\",\n      \"\\tROC AUC: 0.9270843170934745\\n\",\n      \"\\tPR AUC: 0.8155815559085313\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(toxicity_test_df2, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Evaluate models on Many Communities dataset (full)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 23,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"LABEL_NAME_PREDICTION_MODEL = 'removed/logistic'\\n\",\n    \"DATASET = 'gs://conversationai-models/resources/transfer_learning_data/many_communities/20181105_answers_all_columns_nthain.tfrecord'\\n\",\n    \"DATA_LABEL = 'removed'\\n\",\n    \"DATASET_TEXT_FEATURE='comment_text'\\n\",\n    \"\\n\",\n    \"# Pattern for path of tf_records\\n\",\n    \"OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'many_communities_test')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### CNN, GRU Attention Models\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 14,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'tokens'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_cnn_many_communities_glove:v_20190219_185551_gpu_p100_4',\\n\",\n    \"    #'tf_gru_attention_many_communities:v20190322_142800_507893_1556085643',\\n\",\n    \"    #'tf_gru_attention_many_communities:v20190315_161037_23271_1555129264',\\n\",\n    \"    'tf_gru_attention_many_communities:v20190705_004839_507000_1562364428_gpu_p100_4',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, tokenizer, label_data_type=tf.int64)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\\n\",\n    \"test_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 318,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"many_communities_test_df = test_dataset.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 319,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_cnn_many_communities_glove:v_20190219_185551\\n\",\n      \"\\tROC AUC: 0.7476941464055139\\n\",\n      \"\\tPR AUC: 0.07604839414024091\\n\",\n      \"tf_gru_attention_many_communities:v20190315_161037_23271_1555129264\\n\",\n      \"\\tROC AUC: 0.7215269560475308\\n\",\n      \"\\tPR AUC: 0.06656538517176142\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(many_communities_test_df, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### TF-Hub Model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 24,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'text'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_hub_classifier_many_communities:v20190219_185602_316000_1553563221_gpu_v100_4',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, None, label_data_type=tf.int64)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": true\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR)\\n\",\n    \"test_dataset.load_data(10000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 16,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"many_communities_tfhub_test_df = test_dataset.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print_results(many_communities_tfhub_test_df, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Evaluate models on Many Communities subset (adapted for few-shot learning)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 22,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"LABEL_NAME_PREDICTION_MODEL = 'label/logistic'\\n\",\n    \"DATASET_VALID = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord'\\n\",\n    \"DATASET_TEST = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord'\\n\",\n    \"DATA_LABEL = 'label'\\n\",\n    \"DATASET_TEXT_FEATURE='text'\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Pessimistic\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 20,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Pattern for path of tf_records\\n\",\n    \"OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/valid')\\n\",\n    \"OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/test')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### CNN, GRU Attention Models\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 23,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'tokens'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1',\\n\",\n    \"    'tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"valid_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, tokenizer, label_data_type=tf.int64)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, tokenizer, label_data_type=tf.int64)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\\n\",\n    \"valid_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\\n\",\n    \"test_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 27,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8233381391772395\\n\",\n      \"\\tPR AUC: 0.8062951511107903\\n\",\n      \"\\tF1: 0.7607565011820331\\n\",\n      \"tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8303615196078432\\n\",\n      \"\\tPR AUC: 0.8125045070656154\\n\",\n      \"\\tF1: 0.7703703703703705\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(valid_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 25,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.7981477681641835\\n\",\n      \"\\tPR AUC: 0.7900106468171257\\n\",\n      \"\\tF1: 0.7378091872791519\\n\",\n      \"tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8074846866462235\\n\",\n      \"\\tPR AUC: 0.7951370231895221\\n\",\n      \"\\tF1: 0.7507100720996286\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(test_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### TF-Hub Model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 26,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'text'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"valid_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, None, label_data_type=tf.int64)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, None, label_data_type=tf.int64)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\\n\",\n    \"valid_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\\n\",\n    \"test_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 30,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8612435121107267\\n\",\n      \"\\tPR AUC: 0.851153195076283\\n\",\n      \"\\tF1: 0.7937575030012005\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(valid_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 31,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8434673869262717\\n\",\n      \"\\tPR AUC: 0.8326080326940988\\n\",\n      \"\\tF1: 0.779380468195791\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(test_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Optimistic\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 14,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Pattern for path of tf_records\\n\",\n    \"OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/valid')\\n\",\n    \"OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/test')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### CNN, GRU Attention Models\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 15,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'tokens'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1',\\n\",\n    \"    'tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"valid_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, tokenizer, label_data_type=tf.int64)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, tokenizer, label_data_type=tf.int64)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\\n\",\n    \"valid_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\\n\",\n    \"test_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 37,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8304709727028066\\n\",\n      \"\\tPR AUC: 0.8191225889787218\\n\",\n      \"\\tF1: 0.7564259485924112\\n\",\n      \"tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8293254998077663\\n\",\n      \"\\tPR AUC: 0.8181913933482414\\n\",\n      \"\\tF1: 0.7652214022140222\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(valid_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 38,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8043942295635125\\n\",\n      \"\\tPR AUC: 0.79754755517453\\n\",\n      \"\\tF1: 0.7305737109658679\\n\",\n      \"tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8156875904836816\\n\",\n      \"\\tPR AUC: 0.8081941065311745\\n\",\n      \"\\tF1: 0.7558876811594204\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(test_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### TF-Hub Model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 18,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_TEXT_FEATURE = 'text'\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1',\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"model_input_spec = {\\n\",\n    \"    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\\n\",\n    \"\\n\",\n    \"valid_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, None, label_data_type=tf.int64)\\n\",\n    \"\\n\",\n    \"test_input_fn = make_test_input_fn(\\n\",\n    \"    DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,\\n\",\n    \"    DATA_LABEL, None, label_data_type=tf.int64)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)\\n\",\n    \"valid_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"random.seed(2018)\\n\",\n    \"\\n\",\n    \"test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)\\n\",\n    \"test_dataset.load_data(100000000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\\n\",\n    \"test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 22,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8680750192233757\\n\",\n      \"\\tPR AUC: 0.8623373414090059\\n\",\n      \"\\tF1: 0.7900994904149479\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(valid_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 23,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1\\n\",\n      \"\\tROC AUC: 0.8526337876041631\\n\",\n      \"\\tPR AUC: 0.8481017558154519\\n\",\n      \"\\tF1: 0.784984556901877\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print_results(test_dataset.show_data(), MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Finetuned\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 11,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import csv\\n\",\n    \"import matplotlib.pyplot as plt\\n\",\n    \"from sklearn.utils import fixes\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 12,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_list_results_files(parent_dir):\\n\",\n    \"    \\\"\\\"\\\"Gets the paths of all results files that are in parent_dir.\\\"\\\"\\\"\\n\",\n    \"    file_list = []\\n\",\n    \"    for subdirectory, _, files in tf.gfile.Walk(parent_dir):\\n\",\n    \"        [file_list.append(os.path.join(parent_dir, fname)) for fname in files]\\n\",\n    \"    return file_list\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 22,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def load_csv_predictions(pred_file, is_test=False):\\n\",\n    \"    \\\"\\\"\\\"Load the CSV file with predictions and labels.\\\"\\\"\\\"\\n\",\n    \"    model_predictions = None\\n\",\n    \"    labels = None\\n\",\n    \"    communities = None\\n\",\n    \"    names = ['label', 'pred', 'community']\\n\",\n    \"    if is_test:\\n\",\n    \"        names = ['community', 'label', 'pred']\\n\",\n    \"    with file_io.FileIO(pred_file, 'r') as f:\\n\",\n    \"        df = pd.read_csv(f, header=None, names=names)\\n\",\n    \"        labels = df['label'].values\\n\",\n    \"        model_predictions = df['pred'].values\\n\",\n    \"        communities = df['community'].values\\n\",\n    \"    return labels, model_predictions, communities\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 14,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def plot_pr_curve(precisions, recalls, identifier=None):\\n\",\n    \"    \\\"\\\"\\\"Plots the Precision/Recall curve.\\n\",\n    \"    Args:\\n\",\n    \"      precisions: Precisions at all score thresholds.\\n\",\n    \"      recalls: Recalls at all score thresholds.\\n\",\n    \"      identifier: Optional string indicating what this curve is.\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    precision_recall_auc = metrics.auc(recalls, precisions)\\n\",\n    \"    plt.figure()\\n\",\n    \"    step_kwargs = ({\\n\",\n    \"        'step': 'post'\\n\",\n    \"    } if 'step' in fixes.signature(plt.fill_between).parameters else {})\\n\",\n    \"    plt.step(recalls, precisions, color='b', alpha=0.2, where='post')\\n\",\n    \"    plt.fill_between(recalls, precisions, alpha=0.2, color='b', **step_kwargs)\\n\",\n    \"    plt.xlabel('Recall')\\n\",\n    \"    plt.ylabel('Precision')\\n\",\n    \"    plt.ylim([0.0, 1.05])\\n\",\n    \"    plt.xlim([0.0, 1])\\n\",\n    \"    if identifier:\\n\",\n    \"        plt.title('PR curve for %s (AUC = %.2f).' % (\\n\",\n    \"            identifier, precision_recall_auc))\\n\",\n    \"    else:\\n\",\n    \"        plt.title('PR curve (AUC = %.2f).' % precision_recall_auc)\\n\",\n    \"    plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 25,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def compute_metrics_from_dir(results_dir, is_test=False):\\n\",\n    \"    files = get_list_results_files(results_dir)\\n\",\n    \"    for file_path in files:\\n\",\n    \"        curr_trial_name = os.path.basename(file_path)\\n\",\n    \"        print(curr_trial_name)\\n\",\n    \"        labels, model_preds, communities = load_csv_predictions(file_path, is_test)\\n\",\n    \"        fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)\\n\",\n    \"        roc_auc = metrics.auc(fpr, tpr)\\n\",\n    \"        precisions, recalls, thr = metrics.precision_recall_curve(labels, model_preds)\\n\",\n    \"        pr_auc = metrics.auc(recalls, precisions)\\n\",\n    \"        model_preds_binary = (model_preds > 0.5).astype(np.int_)\\n\",\n    \"        f1 = metrics.f1_score(labels, model_preds_binary)\\n\",\n    \"        print('\\\\tROC AUC: {}'.format(roc_auc))\\n\",\n    \"        print('\\\\tPR AUC: {}'.format(pr_auc))\\n\",\n    \"        print('\\\\tF1: {}'.format(f1))\\n\",\n    \"        plot_pr_curve(precisions, recalls, curr_trial_name)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Validation\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 16,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"TF_CNN_VALID_RESULTS_DIR = \\\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/validation\\\"\\n\",\n    \"TF_GRU_VALID_RESULTS_DIR = \\\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/validation\\\"\\n\",\n    \"TF_HUB_VALID_RESULTS_DIR = \\\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/validation\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"compute_metrics_from_dir(TF_CNN_VALID_RESULTS_DIR)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"compute_metrics_from_dir(TF_GRU_VALID_RESULTS_DIR)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"compute_metrics_from_dir(TF_HUB_VALID_RESULTS_DIR)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Test\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 26,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"TF_CNN_TEST_RESULTS_DIR = \\\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/test\\\"\\n\",\n    \"TF_GRU_TEST_RESULTS_DIR = \\\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/test\\\"\\n\",\n    \"TF_HUB_TEST_RESULTS_DIR = \\\"gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/test\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"compute_metrics_from_dir(TF_CNN_TEST_RESULTS_DIR, is_test=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"compute_metrics_from_dir(TF_GRU_TEST_RESULTS_DIR, is_test=False)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"compute_metrics_from_dir(TF_HUB_TEST_RESULTS_DIR)\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.5\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "model_evaluation/input_fn_example.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Defines some examples of input_fn for the evaluation notebook.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport json\nimport numpy as np\nimport pandas as pd\nimport pkg_resources\nimport os\nimport random\nimport re\n\nimport tensorflow as tf\nfrom tensorflow.python.lib.io import file_io\n\nfrom unintended_ml_bias import model_bias_analysis\nfrom utils_export import utils_tfrecords\n\n#Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530\nos.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'\n\n#TODO(fprost): Clean this file.\n\n#### #### #### #### #### ####\n#### PERFORMANCE DATASET ####\n#### #### #### #### #### ####\n\n\ndef create_input_fn_toxicity_performance(tokenizer, model_input_comment_field):\n  \"\"\"Generates an input_fn to evaluate model performance on toxicity dataset.\"\"\"\n\n  TOXICITY_PERFORMANCE_DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'\n  TOXICITY_DATA_LABEL = 'frac_neg'  #Name of the label in the dataset\n  TOXICITY_COMMENT_NAME = 'comment_text'  #Name of the comment in the dataset\n\n  # DECODING\n  decoding_input_features = {\n      TOXICITY_COMMENT_NAME: tf.FixedLenFeature([], dtype=tf.string),\n      TOXICITY_DATA_LABEL: tf.FixedLenFeature([], dtype=tf.float32)\n  }\n\n  def input_fn_performance_toxicity(max_n_examples=None,\n                                    random_filter_keep_rate=1.0):\n    res = utils_tfrecords.decode_tf_records_to_pandas(\n        decoding_input_features, TOXICITY_PERFORMANCE_DATASET, max_n_examples,\n        random_filter_keep_rate)\n    res[model_input_comment_field] = list(\n        map(tokenizer, res[TOXICITY_COMMENT_NAME]))\n    res = res.rename(columns={TOXICITY_DATA_LABEL: 'label'})\n    res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))\n    final = res.copy(deep=True)\n    return final\n\n  return input_fn_performance_toxicity\n\n\n#### #### #### #### ####\n#### CIVIL DATASET  ####\n#### #### #### #### ####\n\nCIVIL_COMMENTS_PATH = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/eval-00000-of-00003.tfrecord'\nTHRESHOLD_BIAS_CIVIL = 0.5\n\ncivil_comments_spec = {\n    'comment_text':\n        tf.FixedLenFeature([], dtype=tf.string),\n    'id':\n        tf.FixedLenFeature([], dtype=tf.string),\n    'toxicity':\n        tf.FixedLenFeature([], dtype=tf.float32),\n    'severe_toxicity':\n        tf.FixedLenFeature([], dtype=tf.float32),\n    'obscene':\n        tf.FixedLenFeature([], dtype=tf.float32),\n    'sexual_explicit':\n        tf.FixedLenFeature([], dtype=tf.float32),\n    'identity_attack':\n        tf.FixedLenFeature([], dtype=tf.float32),\n    'insult':\n        tf.FixedLenFeature([], dtype=tf.float32),\n    'threat':\n        tf.FixedLenFeature([], dtype=tf.float32),\n    'toxicity_annotator_count':\n        tf.FixedLenFeature([], dtype=tf.int64),\n    'identity_annotator_count':\n        tf.FixedLenFeature([], dtype=tf.int64),\n    'male':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'female':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'transgender':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'other_gender':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'heterosexual':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'homosexual_gay_or_lesbian':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'bisexual':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'other_sexual_orientation':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'christian':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'jewish':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'muslim':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'hindu':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'buddhist':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'atheist':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'other_religion':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'black':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'white':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'asian':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'latino':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'other_race_or_ethnicity':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'physical_disability':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'intellectual_or_learning_disability':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'psychiatric_or_mental_illness':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n    'other_disability':\n        tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),\n}\n\nidentity_terms_civil = [\n    'male', 'female', 'transgender', 'other_gender', 'heterosexual',\n    'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation',\n    'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist',\n    'other_religion', 'black', 'white', 'asian', 'latino',\n    'other_race_or_ethnicity', 'physical_disability',\n    'intellectual_or_learning_disability', 'psychiatric_or_mental_illness',\n    'other_disability'\n]\n\nCIVIL_COMMENT_NAME = 'comment_text'\n\n\ndef create_input_fn_civil_performance(tokenizer, model_input_comment_field):\n  \"\"\"Generates an input_fn to evaluate model performance on civil dataset.\"\"\"\n\n  def input_fn_performance_civil(max_n_examples=None,\n                                 random_filter_keep_rate=1.0):\n    civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(\n        civil_comments_spec,\n        CIVIL_COMMENTS_PATH,\n        max_n_examples=max_n_examples,\n        random_filter_keep_rate=random_filter_keep_rate,\n    )\n    civil_df_raw[CIVIL_COMMENT_NAME] = list(\n        map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME]))\n    civil_df_raw['toxicity'] = list(\n        map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))\n    civil_df_raw = civil_df_raw.rename(columns={\n        CIVIL_COMMENT_NAME: model_input_comment_field,\n        'toxicity': 'label'\n    })\n    res = civil_df_raw.copy(deep=True)\n    return res\n\n  return input_fn_performance_civil\n\n\ndef create_input_fn_civil_bias(tokenizer, model_input_comment_field):\n  \"\"\"\"Generates an input_fn to evaluate model bias on civil dataset.\n\n  Construction of this database such as:\n      We keep only examples that have identity labels (with rule: male >=0).\n      We apply the 'threshold_bias_civil' for each identity field.\n      We select x% of the \"background\", i.e. examples that are 0 for each\n      identify.\n\n  Indeed, as the background is dominant, we want to reduce the size of the test\n  set.\n  \"\"\"\n\n  def filter_fn_civil(example, background_filter_keep_rate=0.1):\n    if example['male'] < 0.:\n      return False\n    contains_one_identity = False\n    for _term in identity_terms_civil:\n      if example[_term] >= THRESHOLD_BIAS_CIVIL:\n        contains_one_identity = True\n    if contains_one_identity:\n      return True\n    else:\n      return (random.random() < background_filter_keep_rate)\n\n  def input_fn_bias_civil(max_n_examples=None):\n    civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(\n        civil_comments_spec,\n        CIVIL_COMMENTS_PATH,\n        max_n_examples=max_n_examples,\n        filter_fn=filter_fn_civil,\n    )\n    civil_df_raw[CIVIL_COMMENT_NAME] = list(\n        map(tokenizer, civil_df_raw[CIVIL_COMMENT_NAME]))\n    for _term in identity_terms_civil:\n      civil_df_raw[_term] = list(\n          map(lambda x: x >= THRESHOLD_BIAS_CIVIL, list(civil_df_raw[_term])))\n    civil_df_raw['toxicity'] = list(\n        map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))\n    civil_df_raw = civil_df_raw.rename(columns={\n        CIVIL_COMMENT_NAME: model_input_comment_field,\n        'toxicity': 'label'\n    })\n    res = civil_df_raw.copy(deep=True)\n    return res\n\n  return input_fn_bias_civil\n\n\n#### #### #### #### #### ####\n####  SYNTHETIC DATASET  ####\n#### #### #### #### #### ####\n\n\ndef create_input_fn_artificial_bias(tokenizer, model_input_comment_field):\n  \"\"\"Generates an input_fn to evaluate model bias on synthetic dataset.\"\"\"\n\n  def input_fn_bias(max_n_examples):\n\n    # Loading it from it the unintended_ml_bias github.\n    entire_test_bias_df = pd.read_csv(\n        pkg_resources.resource_stream('unintended_ml_bias',\n                                      'eval_datasets/bias_madlibs_77k.csv'))\n    entire_test_bias_df['raw_text'] = entire_test_bias_df['Text']\n    entire_test_bias_df['label'] = entire_test_bias_df['Label']\n    entire_test_bias_df['label'] = list(\n        map(lambda x: x == 'BAD', entire_test_bias_df['label']))\n    entire_test_bias_df = entire_test_bias_df[['raw_text', 'label']].copy()\n    identity_terms_synthetic = [\n        line.strip() for line in pkg_resources.resource_stream(\n            'unintended_ml_bias', 'bias_madlibs_data/adjectives_people.txt')\n    ]\n    model_bias_analysis.add_subgroup_columns_from_text(\n        entire_test_bias_df, 'raw_text', identity_terms_synthetic)\n\n    # Add preprocessing\n    entire_test_bias_df['text'] = list(\n        map(tokenizer, entire_test_bias_df['raw_text']))\n    if max_n_examples:\n      res = entire_test_bias_df.sample(n=max_n_examples, random_state=2018)\n    else:\n      res = entire_test_bias_df\n    res = res.copy(deep=True)\n    res = res.rename(columns={'raw_text': model_input_comment_field})\n    return res\n\n  return input_fn_bias\n\n#### #### #### #### #### ####\n####  BIASBIOS DATASET   ####\n#### #### #### #### #### ####\n\nBIASBIOS_PATH = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190225173815/test*.tfrecord'\nSCRUBBED_BIASBIOS_PATH = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190225173815_scrubbed/test*.tfrecord'\n\ncomments_spec = {\n    'comment_text':\n        tf.FixedLenFeature([], dtype=tf.string),\n    'gender':\n        tf.FixedLenFeature([], dtype=tf.string),\n    'title':\n        tf.FixedLenFeature([], dtype=tf.int64)\n}\n\nidentity_terms = [\n    'gender'\n]\n\nCOMMENT_NAME = 'comment_text'\nLABEL_NAME = 'title'\n\n\ndef create_input_fn_biasbios(tokenizer, model_input_comment_field, scrubbed=False):\n  \"\"\"\"Generates an input_fn to evaluate model bias on biasbios dataset.\n  \"\"\"\n\n  def filter_fn_biasbios(example, background_filter_keep_rate=1.0):\n    return (random.random() < background_filter_keep_rate)\n\n  def input_fn_biasbios(max_n_examples=None, random_filter_keep_rate=1.0):\n    if scrubbed:\n      path = SCRUBBED_BIASBIOS_PATH\n    else:\n      path = BIASBIOS_PATH\n    df_raw = utils_tfrecords.decode_tf_records_to_pandas(\n        comments_spec,\n        path,\n        max_n_examples=max_n_examples,\n        filter_fn=filter_fn_biasbios,\n    )\n    df_raw[COMMENT_NAME] = list(\n        map(tokenizer, df_raw[COMMENT_NAME]))\n    #for _term in identity_terms:\n    #  df_raw[_term] = list(df_raw[_term])\n    #df_raw[LABEL_NAME] = list(df_raw[LABEL_NAME])\n    df_raw = df_raw.rename(columns={\n        COMMENT_NAME: model_input_comment_field,\n        LABEL_NAME: 'label'\n    })\n    res = df_raw.copy(deep=True)\n    return res\n\n  return input_fn_biasbios\n\n#### #### #### #### #### ####\n####  SYNTHETIC DATASET  ####\n#### #### #### #### #### ####\n\n\ndef create_input_fn_artificial_bias(tokenizer, model_input_comment_field):\n  \"\"\"Generates an input_fn to evaluate model bias on synthetic dataset.\"\"\"\n\n  def input_fn_bias(max_n_examples):\n\n    # Loading it from it the unintended_ml_bias github.\n    entire_test_bias_df = pd.read_csv(\n        pkg_resources.resource_stream('unintended_ml_bias',\n                                      'eval_datasets/bias_madlibs_77k.csv'))\n    entire_test_bias_df['raw_text'] = entire_test_bias_df['Text']\n    entire_test_bias_df['label'] = entire_test_bias_df['Label']\n    entire_test_bias_df['label'] = list(\n        map(lambda x: x == 'BAD', entire_test_bias_df['label']))\n    entire_test_bias_df = entire_test_bias_df[['raw_text', 'label']].copy()\n    identity_terms_synthetic = [\n        line.strip() for line in pkg_resources.resource_stream(\n            'unintended_ml_bias', 'bias_madlibs_data/adjectives_people.txt')\n    ]\n    model_bias_analysis.add_subgroup_columns_from_text(\n        entire_test_bias_df, 'raw_text', identity_terms_synthetic)\n\n    # Add preprocessing\n    entire_test_bias_df['text'] = list(\n        map(tokenizer, entire_test_bias_df['raw_text']))\n    if max_n_examples:\n      res = entire_test_bias_df.sample(n=max_n_examples, random_state=2018)\n    else:\n      res = entire_test_bias_df\n    res = res.copy(deep=True)\n    res = res.rename(columns={'raw_text': model_input_comment_field})\n    return res\n\n  return input_fn_bias"
  },
  {
    "path": "model_evaluation/jigsaw_evaluation_pipeline.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"-YibCLoSLRHp\"\n   },\n   \"source\": [\n    \"Copyright 2018 Google LLC.\\n\",\n    \"\\n\",\n    \"Licensed under the Apache License, Version 2.0 (the \\\"License\\\");\\n\",\n    \"you may not use this file except in compliance with the License.\\n\",\n    \"You may obtain a copy of the License at\\n\",\n    \"\\n\",\n    \"https://www.apache.org/licenses/LICENSE-2.0\\n\",\n    \"\\n\",\n    \"Unless required by applicable law or agreed to in writing, software\\n\",\n    \"distributed under the License is distributed on an \\\"AS IS\\\" BASIS,\\n\",\n    \"WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\\n\",\n    \"See the License for the specific language governing permissions and\\n\",\n    \"limitations under the License.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"LMykUGMauh9b\"\n   },\n   \"source\": [\n    \"# Evaluation code\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"__Disclaimer__\\n\",\n    \"*   This notebook contains experimental code, which may be changed without notice.\\n\",\n    \"*   The ideas here are some ideas relevant to fairness - they are not the whole story!\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Notebook summary\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"This notebook intends to evaluate a list of models on two dimensions:\\n\",\n    \"- \\\"Performance\\\": How well the model perform to classify the data (intended bias). Currently, we use the AUC.\\n\",\n    \"- \\\"Bias\\\": How much bias does the model contain (unintended bias). Currently, we use the pinned auc.\\n\",\n    \"\\n\",\n    \"This script takes the following steps:\\n\",\n    \"\\n\",\n    \"- Defines the models to evaluate and specify their signature (expected inputs/outputs).\\n\",\n    \"- Write input function to generate 2 datasets:\\n\",\n    \"    - A \\\"performance dataset\\\" which will be used for the first set of metrics. This dataset is supposed to be similar format to the training data (contain a piece of text and a label).\\n\",\n    \"    - A \\\"bias dataset\\\" which will be used for the second set of metrics. This data contains a piece of text, a label but also some subgroup information to evaluate the unintended bias on.\\n\",\n    \"- Runs predictions with the export_utils.\\n\",\n    \"- Evaluate metrics.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%load_ext autoreload\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%autoreload 2\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from __future__ import absolute_import\\n\",\n    \"from __future__ import division\\n\",\n    \"from __future__ import print_function\\n\",\n    \"\\n\",\n    \"import getpass\\n\",\n    \"from IPython.display import display\\n\",\n    \"import json\\n\",\n    \"import nltk\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas as pd\\n\",\n    \"import pkg_resources\\n\",\n    \"import os\\n\",\n    \"import random\\n\",\n    \"import re\\n\",\n    \"import seaborn as sns\\n\",\n    \"\\n\",\n    \"import tensorflow as tf\\n\",\n    \"from tensorflow.python.lib.io import file_io\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"#from google.colab import auth\\n\",\n    \"#auth.authenticate_user()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"#!pip install -U -q git+https://github.com/conversationai/unintended-ml-bias-analysis\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from unintended_ml_bias import model_bias_analysis\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"metadata\": {\n    \"scrolled\": true\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"import input_fn_example\\n\",\n    \"from utils_export.dataset import Dataset, Model\\n\",\n    \"from utils_export import utils_cloudml\\n\",\n    \"from utils_export import utils_tfrecords\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"[nltk_data] Downloading package punkt to /Users/nthain/nltk_data...\\n\",\n      \"[nltk_data]   Package punkt is already up-to-date!\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"True\"\n      ]\n     },\n     \"execution_count\": 9,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"nltk.download('punkt')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Settings\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Global variables\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 10,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# User inputs\\n\",\n    \"PROJECT_NAME = 'conversationai-models'\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Part 1: Defining your model\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"An important user input is the description of the deployed models that are evaluated.\\n\",\n    \"\\n\",\n    \"1- Defining which model will be used.\\n\",\n    \"$MODEL_NAMES defined the different names (format: \\\"model_name:version\\\").\\n\",\n    \"\\n\",\n    \"2- Defining the model signature.\\n\",\n    \"Currently, the `Dataset` API does not detect the signature of a CMLE model, so this information is given by a `Model` instance.\\n\",\n    \"You need to describe:\\n\",\n    \"- input_spec: what the input_file should be (argument `feature_keys_spec`). It is a dictionary which describes the name of the fields and their types.\\n\",\n    \"- prediction_keys (argument `prediction_keys`). It is the name of the prediction field in the model output.\\n\",\n    \"- Name of the example key (argument `example_key`). A unique identifier for each sentence which will be generated by the dataset API (a.k.a. your input data does not need to have this field).\\n\",\n    \"    - When using Cloud MLE for batch predictions, data is processed in an unpredictable order. To be able to match the returned predictions with your input instances, you must have instance keys defined.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 11,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# User inputs:\\n\",\n    \"MODEL_NAMES = [\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738', # ??\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748', # ??\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820', # ??\\n\",\n    \"    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828', # ??\\n\",\n    \"]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 12,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# User inputs: Model description (see above for more info).\\n\",\n    \"TEXT_FEATURE_NAME = 'tokens' #Input defined in serving function called in run.py (arg: `text_feature_name`).\\n\",\n    \"SENTENCE_KEY = 'comment_key' #Input key defined in serving functioncalled in run.py (arg: `example_key_name`).\\n\",\n    \"#LABEL_NAME_PREDICTION_MODEL = 'scores' # Output prediction: typically $label_name/logistic\\n\",\n    \"LABEL_NAME_PREDICTION_MODEL = 'probabilities' # Output prediction: typically $label_name/logistic\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 13,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model_input_spec = {\\n\",\n    \"    TEXT_FEATURE_NAME: utils_tfrecords.EncodingFeatureSpec.LIST_STRING} #library will use this automatically\\n\",\n    \"\\n\",\n    \"model = Model(\\n\",\n    \"    feature_keys_spec=model_input_spec,\\n\",\n    \"    prediction_keys=LABEL_NAME_PREDICTION_MODEL,\\n\",\n    \"    example_key=SENTENCE_KEY,\\n\",\n    \"    model_names=MODEL_NAMES,\\n\",\n    \"    project_name=PROJECT_NAME)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Part 2: Defining the input_fn\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 14,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def tokenizer(text, lowercase=True):\\n\",\n    \"  \\\"\\\"\\\"Converts text to a list of words.\\n\",\n    \"\\n\",\n    \"  Args:\\n\",\n    \"    text: piece of text to tokenize (string).\\n\",\n    \"    lowercase: whether to include lowercasing in preprocessing (boolean).\\n\",\n    \"    tokenizer: Python function to tokenize the text on.\\n\",\n    \"\\n\",\n    \"  Returns:\\n\",\n    \"    A list of strings (words).\\n\",\n    \"  \\\"\\\"\\\"\\n\",\n    \"  words = nltk.word_tokenize(text.decode('utf-8'))\\n\",\n    \"  if lowercase:\\n\",\n    \"    words = [w.lower() for w in words]\\n\",\n    \"  return words\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Defining input_fn\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"We need to define first some input_fn which will be fed to the `Dataset` API.\\n\",\n    \"An input_fn must follow the following requirements:\\n\",\n    \"- Returns a pandas DataFrame\\n\",\n    \"- Have an argument 'max_n_examples' to control the size of the dataframe.\\n\",\n    \"- Containing at least a field $TEXT_FEATURE_NAME, which maps to a tokenized text (list of words) AND  a field 'label' which is 1 for toxic (0 otherwise).\\n\",\n    \"\\n\",\n    \"We will define two different input_fn (1 for performance, 1 for bias). The bias input_fn should also contain identity information.\\n\",\n    \"\\n\",\n    \"Note: You can use ANY input_fn that matches those requirements. You can find a few examples of input_fn in the file input_fn_example.py (for toxicity and civil_comments dataset).\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 15,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# User inputs: Choose which one you want to use OR create your own!\\n\",\n    \"INPUT_FN_PERFORMANCE = input_fn_example.create_input_fn_biasbios(\\n\",\n    \"    tokenizer,\\n\",\n    \"    model_input_comment_field=TEXT_FEATURE_NAME,\\n\",\n    \"    )\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Part 3: Running prediction\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Performance dataset\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 16,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# User inputs\\n\",\n    \"SIZE_PERFORMANCE_DATA_SET = 10000\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 17,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"gs://conversationai-models/nthain/tfrecords/performance_dataset_dir\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"# Pattern for path of tf_records\\n\",\n    \"PERFORMANCE_DATASET_DIR = os.path.join(\\n\",\n    \"    'gs://conversationai-models/',\\n\",\n    \"    getpass.getuser(),\\n\",\n    \"    'tfrecords',\\n\",\n    \"    'performance_dataset_dir')\\n\",\n    \"print(PERFORMANCE_DATASET_DIR)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 18,\n   \"metadata\": {\n    \"scrolled\": true\n   },\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"INFO:tensorflow:input_fn is compatible with the `Dataset` class.\\n\"\n     ]\n    },\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/tensorflow/python/client/session.py:1711: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).\\n\",\n      \"  warnings.warn('An interactive session is already active. This can '\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"dataset_performance = Dataset(INPUT_FN_PERFORMANCE, PERFORMANCE_DATASET_DIR)\\n\",\n    \"random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future\\n\",\n    \"dataset_performance.load_data(SIZE_PERFORMANCE_DATA_SET, random_filter_keep_rate=0.5)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 19,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>tokens</th>\\n\",\n       \"      <th>gender</th>\\n\",\n       \"      <th>label</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>[in, her, role, ,, she, is, a, member, of, an,...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>17</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>[his, blog, www.donaldhtaylorjr.blogspot.com, ...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>[he, has, primarily, reported, for, the, atlan...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>12</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>[andrea, 's, area, of, expertise, is, in, whol...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>[dr., milane, was, trained, as, a, national, c...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>[he, is, also, visiting, associate, professor,...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>[her, research, focuses, on, the, trafficking,...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>[he, has, been, licensed, to, practice, law, i...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>[after, a, two-year, postdoctoral, fellowship,...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>[prior, to, teaching, ,, she, was, an, account...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>31</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>[jackie, 's, works, are, published, in, academ...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>[her, research, topic, was, the, investigation...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>12</th>\\n\",\n       \"      <td>[she, graduated, with, honors, in, 2012, ., ha...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>17</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>13</th>\\n\",\n       \"      <td>[his, research, focuses, on, the, japan, air, ...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>14</th>\\n\",\n       \"      <td>[she, directed, the, 2014, peabody, award-winn...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>15</th>\\n\",\n       \"      <td>[he, lends, his, exceptional, surgical, skills...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>16</th>\\n\",\n       \"      <td>[he, teaches, courses, ranging, from, core, un...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>17</th>\\n\",\n       \"      <td>[her, major, fields, of, interest, are, develo...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>18</th>\\n\",\n       \"      <td>[dr., cole, honors, several, insurance, carrie...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>23</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>19</th>\\n\",\n       \"      <td>[she, practices, in, the, areas, of, business,...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>20</th>\\n\",\n       \"      <td>[she, has, obtained, her, phd, in, eu, law, fr...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>21</th>\\n\",\n       \"      <td>[his, photographs, are, reminiscent, of, silho...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>22</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>22</th>\\n\",\n       \"      <td>[he, earned, his, ph.d., at, the, university, ...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>23</th>\\n\",\n       \"      <td>[his, inter-, disciplinary, research, interest...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>24</th>\\n\",\n       \"      <td>[she, earned, her, ph.d., in, communication, s...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>25</th>\\n\",\n       \"      <td>[his, current, projects, examine, intergenerat...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>26</th>\\n\",\n       \"      <td>[he, has, served, as, an, expert, witness, in,...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>27</th>\\n\",\n       \"      <td>[she, 's, called, in, some, of, the, parent, o...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>31</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>28</th>\\n\",\n       \"      <td>[nneka, has, recently, become, interested, in,...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>29</th>\\n\",\n       \"      <td>[she, writes, regularly, for, faith, and, lead...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>...</th>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9970</th>\\n\",\n       \"      <td>[he, was, previously, an, assistant, professor...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9971</th>\\n\",\n       \"      <td>[aside, from, filmmaking, ,, he, ’, s, an, avi...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9972</th>\\n\",\n       \"      <td>[he, lives, in, dallas, with, his, wife, and, ...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>29</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9973</th>\\n\",\n       \"      <td>[he, exhibited, in, institutions, like, kultur...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>22</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9974</th>\\n\",\n       \"      <td>[he, has, represented, numerous, municipalitie...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9975</th>\\n\",\n       \"      <td>[his, works, include, portrait, ,, glamour, an...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>22</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9976</th>\\n\",\n       \"      <td>[he, began, using, haskell, during, his, senio...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>29</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9977</th>\\n\",\n       \"      <td>[he, has, been, involved, with, streaming, med...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9978</th>\\n\",\n       \"      <td>[he, has, also, produced, lecture, courses, fo...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9979</th>\\n\",\n       \"      <td>[after, completing, her, degrees, at, the, uni...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>23</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9980</th>\\n\",\n       \"      <td>[this, is, a, slightly, edited, version, of, h...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>12</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9981</th>\\n\",\n       \"      <td>[she, received, her, b.sc, ., in, nutrition, f...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>8</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9982</th>\\n\",\n       \"      <td>[she, is, the, author, of, pelo, bueno, y, otr...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>24</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9983</th>\\n\",\n       \"      <td>[she, obtained, her, bachelor, of, science, de...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>23</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9984</th>\\n\",\n       \"      <td>[dr., kanchan, singh, practices, at, singh, de...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9985</th>\\n\",\n       \"      <td>[prior, to, joining, fresh, 'n, fit, cuisine, ...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>8</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9986</th>\\n\",\n       \"      <td>[he, worked, on, staff, at, aopa, pilot, magaz...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>12</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9987</th>\\n\",\n       \"      <td>[he, started, working, on, these, themes, duri...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>18</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9988</th>\\n\",\n       \"      <td>[his, research, aims, to, understand, the, con...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9989</th>\\n\",\n       \"      <td>[he, received, the, ph.d., degree, in, measuri...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9990</th>\\n\",\n       \"      <td>[he, currently, practices, at, johns, hopkins,...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9991</th>\\n\",\n       \"      <td>[she, received, her, m.a, ., in, secondary, ed...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>31</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9992</th>\\n\",\n       \"      <td>[his, research, interests, lie, in, the, study...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9993</th>\\n\",\n       \"      <td>[she, graduated, with, honors, in, 2000, ., ha...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>26</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9994</th>\\n\",\n       \"      <td>[chris, primarily, teaches, anatomy, and, phys...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>31</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9995</th>\\n\",\n       \"      <td>[always, responsive, to, the, specific, geogra...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9996</th>\\n\",\n       \"      <td>[he, has, worked, on, numerous, projects, that...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>29</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9997</th>\\n\",\n       \"      <td>[he, graduated, from, the, academy, of, visual...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>22</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9998</th>\\n\",\n       \"      <td>[most, of, his, writing, is, from, the, middle...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>12</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9999</th>\\n\",\n       \"      <td>[he, is, currently, on, the, good, news, poetr...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>24</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"<p>10000 rows × 3 columns</p>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"                                                 tokens gender  label\\n\",\n       \"0     [in, her, role, ,, she, is, a, member, of, an,...      F     17\\n\",\n       \"1     [his, blog, www.donaldhtaylorjr.blogspot.com, ...      M     25\\n\",\n       \"2     [he, has, primarily, reported, for, the, atlan...      M     12\\n\",\n       \"3     [andrea, 's, area, of, expertise, is, in, whol...      F     25\\n\",\n       \"4     [dr., milane, was, trained, as, a, national, c...      F     25\\n\",\n       \"5     [he, is, also, visiting, associate, professor,...      M     25\\n\",\n       \"6     [her, research, focuses, on, the, trafficking,...      F     25\\n\",\n       \"7     [he, has, been, licensed, to, practice, law, i...      M      3\\n\",\n       \"8     [after, a, two-year, postdoctoral, fellowship,...      M     25\\n\",\n       \"9     [prior, to, teaching, ,, she, was, an, account...      F     31\\n\",\n       \"10    [jackie, 's, works, are, published, in, academ...      F     25\\n\",\n       \"11    [her, research, topic, was, the, investigation...      F     25\\n\",\n       \"12    [she, graduated, with, honors, in, 2012, ., ha...      F     17\\n\",\n       \"13    [his, research, focuses, on, the, japan, air, ...      M     25\\n\",\n       \"14    [she, directed, the, 2014, peabody, award-winn...      F     10\\n\",\n       \"15    [he, lends, his, exceptional, surgical, skills...      M     30\\n\",\n       \"16    [he, teaches, courses, ranging, from, core, un...      M     25\\n\",\n       \"17    [her, major, fields, of, interest, are, develo...      F     25\\n\",\n       \"18    [dr., cole, honors, several, insurance, carrie...      M     23\\n\",\n       \"19    [she, practices, in, the, areas, of, business,...      F      3\\n\",\n       \"20    [she, has, obtained, her, phd, in, eu, law, fr...      F     25\\n\",\n       \"21    [his, photographs, are, reminiscent, of, silho...      M     22\\n\",\n       \"22    [he, earned, his, ph.d., at, the, university, ...      M     25\\n\",\n       \"23    [his, inter-, disciplinary, research, interest...      M     25\\n\",\n       \"24    [she, earned, her, ph.d., in, communication, s...      F     25\\n\",\n       \"25    [his, current, projects, examine, intergenerat...      M     25\\n\",\n       \"26    [he, has, served, as, an, expert, witness, in,...      M      0\\n\",\n       \"27    [she, 's, called, in, some, of, the, parent, o...      F     31\\n\",\n       \"28    [nneka, has, recently, become, interested, in,...      F      3\\n\",\n       \"29    [she, writes, regularly, for, faith, and, lead...      F     20\\n\",\n       \"...                                                 ...    ...    ...\\n\",\n       \"9970  [he, was, previously, an, assistant, professor...      M     25\\n\",\n       \"9971  [aside, from, filmmaking, ,, he, ’, s, an, avi...      M     10\\n\",\n       \"9972  [he, lives, in, dallas, with, his, wife, and, ...      M     29\\n\",\n       \"9973  [he, exhibited, in, institutions, like, kultur...      M     22\\n\",\n       \"9974  [he, has, represented, numerous, municipalitie...      M      3\\n\",\n       \"9975  [his, works, include, portrait, ,, glamour, an...      M     22\\n\",\n       \"9976  [he, began, using, haskell, during, his, senio...      M     29\\n\",\n       \"9977  [he, has, been, involved, with, streaming, med...      M      2\\n\",\n       \"9978  [he, has, also, produced, lecture, courses, fo...      M     25\\n\",\n       \"9979  [after, completing, her, degrees, at, the, uni...      F     23\\n\",\n       \"9980  [this, is, a, slightly, edited, version, of, h...      F     12\\n\",\n       \"9981  [she, received, her, b.sc, ., in, nutrition, f...      F      8\\n\",\n       \"9982  [she, is, the, author, of, pelo, bueno, y, otr...      F     24\\n\",\n       \"9983  [she, obtained, her, bachelor, of, science, de...      F     23\\n\",\n       \"9984  [dr., kanchan, singh, practices, at, singh, de...      M     30\\n\",\n       \"9985  [prior, to, joining, fresh, 'n, fit, cuisine, ...      F      8\\n\",\n       \"9986  [he, worked, on, staff, at, aopa, pilot, magaz...      M     12\\n\",\n       \"9987  [he, started, working, on, these, themes, duri...      M     18\\n\",\n       \"9988  [his, research, aims, to, understand, the, con...      M     25\\n\",\n       \"9989  [he, received, the, ph.d., degree, in, measuri...      M     25\\n\",\n       \"9990  [he, currently, practices, at, johns, hopkins,...      M     30\\n\",\n       \"9991  [she, received, her, m.a, ., in, secondary, ed...      F     31\\n\",\n       \"9992  [his, research, interests, lie, in, the, study...      M     25\\n\",\n       \"9993  [she, graduated, with, honors, in, 2000, ., ha...      F     26\\n\",\n       \"9994  [chris, primarily, teaches, anatomy, and, phys...      M     31\\n\",\n       \"9995  [always, responsive, to, the, specific, geogra...      F      2\\n\",\n       \"9996  [he, has, worked, on, numerous, projects, that...      M     29\\n\",\n       \"9997  [he, graduated, from, the, academy, of, visual...      M     22\\n\",\n       \"9998  [most, of, his, writing, is, from, the, middle...      M     12\\n\",\n       \"9999  [he, is, currently, on, the, good, news, poetr...      M     24\\n\",\n       \"\\n\",\n       \"[10000 rows x 3 columns]\"\n      ]\n     },\n     \"execution_count\": 19,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"dataset_performance.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 20,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"(10000, 3)\"\n      ]\n     },\n     \"execution_count\": 20,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"dataset_performance.show_data().shape\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 21,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"Index([u'tokens', u'gender', u'label'], dtype='object')\"\n      ]\n     },\n     \"execution_count\": 21,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"dataset_performance.show_data().columns\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 22,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"CLASS_NAMES = range(33)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 23,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"features {\\n\",\n      \"  feature {\\n\",\n      \"    key: \\\"comment_text\\\"\\n\",\n      \"    value {\\n\",\n      \"      bytes_list {\\n\",\n      \"        value: \\\" In her role, she is a member of an innovative team-based care model which has been recognized by Wall Street Journal and the Robert Wood Johnson Foundation. A process improvement leader with a passion for serving vulnerable populations, Amberly was recognized by her colleagues with the first Daisy Award for Extraordinary Nurses at Cambridge Health Alliance. Amberly holds a BS in Nursing from Valparaiso University and a Masters in Public Health from the University of Massachusetts Amherst. read more\\\"\\n\",\n      \"      }\\n\",\n      \"    }\\n\",\n      \"  }\\n\",\n      \"  feature {\\n\",\n      \"    key: \\\"gender\\\"\\n\",\n      \"    value {\\n\",\n      \"      bytes_list {\\n\",\n      \"        value: \\\"F\\\"\\n\",\n      \"      }\\n\",\n      \"    }\\n\",\n      \"  }\\n\",\n      \"  feature {\\n\",\n      \"    key: \\\"title\\\"\\n\",\n      \"    value {\\n\",\n      \"      int64_list {\\n\",\n      \"        value: 17\\n\",\n      \"      }\\n\",\n      \"    }\\n\",\n      \"  }\\n\",\n      \"}\\n\",\n      \"\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"INPUT_DATA = 'gs://conversationai-models/biosbias/dataflow_dir/data-preparation-20190220165938/eval-00000-of-00003.tfrecord'\\n\",\n    \"record_iterator = tf.python_io.tf_record_iterator(path=INPUT_DATA)\\n\",\n    \"string_record = next(record_iterator)\\n\",\n    \"example = tf.train.Example()\\n\",\n    \"example.ParseFromString(string_record)\\n\",\n    \"text = example.features.feature\\n\",\n    \"print(example)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 24,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"INFO:tensorflow:Model is compatible with the `Dataset` instance.\\n\",\n      \"WARNING:tensorflow:Using past predictions. the data must match exactly (same number of lines and same order).\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"# Set recompute_predictions=False to save time if predictions are available.\\n\",\n    \"dataset_performance.add_model_prediction_to_data(model, recompute_predictions=False, class_names=CLASS_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 25,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"gs://conversationai-models/nthain/tfrecords/performance_dataset_dir/prediction_data_tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738\\n\",\n      \"33\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"def _load_predictions(pred_file):\\n\",\n    \"    with file_io.FileIO(pred_file, 'r') as f:\\n\",\n    \"      # prediction file needs to fit in memory.\\n\",\n    \"      try:\\n\",\n    \"        predictions = [json.loads(line) for line in f]\\n\",\n    \"      except:\\n\",\n    \"        predictions = []\\n\",\n    \"    return predictions\\n\",\n    \"\\n\",\n    \"model_name_tmp = MODEL_NAMES[0]\\n\",\n    \"prediction_file = dataset_performance.get_path_prediction(model_name_tmp)\\n\",\n    \"print(prediction_file)\\n\",\n    \"prediction_file = os.path.join(prediction_file,\\n\",\n    \"                                 'prediction.results-00000-of-00001')\\n\",\n    \"print(len(_load_predictions(prediction_file)[0]['probabilities']))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": true\n   },\n   \"outputs\": [],\n   \"source\": []\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Post processing\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 26,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"test_performance_df = dataset_performance.show_data()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 27,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"test_bias_df = test_performance_df.copy()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Analyzing final results\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 28,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"base_uri\": \"https://localhost:8080/\",\n     \"height\": 204\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 17,\n     \"status\": \"ok\",\n     \"timestamp\": 1530641283264,\n     \"user\": {\n      \"displayName\": \"Flavien Prost\",\n      \"photoUrl\": \"//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg\",\n      \"userId\": \"100080410554240838905\"\n     },\n     \"user_tz\": 240\n    },\n    \"id\": \"Y7R4heIB5GaV\",\n    \"outputId\": \"e8e0c3bc-96d8-4635-865a-275052054df8\"\n   },\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>tokens</th>\\n\",\n       \"      <th>gender</th>\\n\",\n       \"      <th>label</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6</th>\\n\",\n       \"      <th>...</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>[in, her, role, ,, she, is, a, member, of, an,...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>17</td>\\n\",\n       \"      <td>0.001687</td>\\n\",\n       \"      <td>1.814099e-11</td>\\n\",\n       \"      <td>0.002681</td>\\n\",\n       \"      <td>0.009853</td>\\n\",\n       \"      <td>0.004227</td>\\n\",\n       \"      <td>0.055716</td>\\n\",\n       \"      <td>0.003005</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.003351</td>\\n\",\n       \"      <td>0.013561</td>\\n\",\n       \"      <td>0.002040</td>\\n\",\n       \"      <td>0.001682</td>\\n\",\n       \"      <td>4.412969e-04</td>\\n\",\n       \"      <td>6.086852e-17</td>\\n\",\n       \"      <td>0.001606</td>\\n\",\n       \"      <td>0.001379</td>\\n\",\n       \"      <td>0.014635</td>\\n\",\n       \"      <td>0.000032</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>[his, blog, www.donaldhtaylorjr.blogspot.com, ...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.014774</td>\\n\",\n       \"      <td>2.716771e-13</td>\\n\",\n       \"      <td>0.005496</td>\\n\",\n       \"      <td>0.022347</td>\\n\",\n       \"      <td>0.003845</td>\\n\",\n       \"      <td>0.084480</td>\\n\",\n       \"      <td>0.000096</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.010309</td>\\n\",\n       \"      <td>0.001055</td>\\n\",\n       \"      <td>0.001062</td>\\n\",\n       \"      <td>0.006205</td>\\n\",\n       \"      <td>9.439933e-07</td>\\n\",\n       \"      <td>5.250679e-18</td>\\n\",\n       \"      <td>0.001204</td>\\n\",\n       \"      <td>0.000150</td>\\n\",\n       \"      <td>0.015252</td>\\n\",\n       \"      <td>0.000779</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>[he, has, primarily, reported, for, the, atlan...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>12</td>\\n\",\n       \"      <td>0.016779</td>\\n\",\n       \"      <td>8.870694e-16</td>\\n\",\n       \"      <td>0.001688</td>\\n\",\n       \"      <td>0.071343</td>\\n\",\n       \"      <td>0.000560</td>\\n\",\n       \"      <td>0.029823</td>\\n\",\n       \"      <td>0.000032</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.018767</td>\\n\",\n       \"      <td>0.022292</td>\\n\",\n       \"      <td>0.077598</td>\\n\",\n       \"      <td>0.033979</td>\\n\",\n       \"      <td>8.196229e-05</td>\\n\",\n       \"      <td>3.315851e-11</td>\\n\",\n       \"      <td>0.007313</td>\\n\",\n       \"      <td>0.002565</td>\\n\",\n       \"      <td>0.118167</td>\\n\",\n       \"      <td>0.001603</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>[andrea, 's, area, of, expertise, is, in, whol...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.017742</td>\\n\",\n       \"      <td>1.019689e-15</td>\\n\",\n       \"      <td>0.017150</td>\\n\",\n       \"      <td>0.052085</td>\\n\",\n       \"      <td>0.002097</td>\\n\",\n       \"      <td>0.052322</td>\\n\",\n       \"      <td>0.002627</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001580</td>\\n\",\n       \"      <td>0.145462</td>\\n\",\n       \"      <td>0.000637</td>\\n\",\n       \"      <td>0.000337</td>\\n\",\n       \"      <td>3.909138e-04</td>\\n\",\n       \"      <td>1.304484e-21</td>\\n\",\n       \"      <td>0.011515</td>\\n\",\n       \"      <td>0.000922</td>\\n\",\n       \"      <td>0.029867</td>\\n\",\n       \"      <td>0.000001</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>[dr., milane, was, trained, as, a, national, c...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.015531</td>\\n\",\n       \"      <td>1.783027e-12</td>\\n\",\n       \"      <td>0.196227</td>\\n\",\n       \"      <td>0.016471</td>\\n\",\n       \"      <td>0.002690</td>\\n\",\n       \"      <td>0.000040</td>\\n\",\n       \"      <td>0.001384</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.013445</td>\\n\",\n       \"      <td>0.003754</td>\\n\",\n       \"      <td>0.220090</td>\\n\",\n       \"      <td>0.081232</td>\\n\",\n       \"      <td>7.920414e-05</td>\\n\",\n       \"      <td>2.406181e-13</td>\\n\",\n       \"      <td>0.150817</td>\\n\",\n       \"      <td>0.014913</td>\\n\",\n       \"      <td>0.071632</td>\\n\",\n       \"      <td>0.000142</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"<p>5 rows × 135 columns</p>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"                                              tokens gender  label  \\\\\\n\",\n       \"0  [in, her, role, ,, she, is, a, member, of, an,...      F     17   \\n\",\n       \"1  [his, blog, www.donaldhtaylorjr.blogspot.com, ...      M     25   \\n\",\n       \"2  [he, has, primarily, reported, for, the, atlan...      M     12   \\n\",\n       \"3  [andrea, 's, area, of, expertise, is, in, whol...      F     25   \\n\",\n       \"4  [dr., milane, was, trained, as, a, national, c...      F     25   \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0  \\\\\\n\",\n       \"0                                           0.001687                           \\n\",\n       \"1                                           0.014774                           \\n\",\n       \"2                                           0.016779                           \\n\",\n       \"3                                           0.017742                           \\n\",\n       \"4                                           0.015531                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1  \\\\\\n\",\n       \"0                                       1.814099e-11                           \\n\",\n       \"1                                       2.716771e-13                           \\n\",\n       \"2                                       8.870694e-16                           \\n\",\n       \"3                                       1.019689e-15                           \\n\",\n       \"4                                       1.783027e-12                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2  \\\\\\n\",\n       \"0                                           0.002681                           \\n\",\n       \"1                                           0.005496                           \\n\",\n       \"2                                           0.001688                           \\n\",\n       \"3                                           0.017150                           \\n\",\n       \"4                                           0.196227                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3  \\\\\\n\",\n       \"0                                           0.009853                           \\n\",\n       \"1                                           0.022347                           \\n\",\n       \"2                                           0.071343                           \\n\",\n       \"3                                           0.052085                           \\n\",\n       \"4                                           0.016471                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4  \\\\\\n\",\n       \"0                                           0.004227                           \\n\",\n       \"1                                           0.003845                           \\n\",\n       \"2                                           0.000560                           \\n\",\n       \"3                                           0.002097                           \\n\",\n       \"4                                           0.002690                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5  \\\\\\n\",\n       \"0                                           0.055716                           \\n\",\n       \"1                                           0.084480                           \\n\",\n       \"2                                           0.029823                           \\n\",\n       \"3                                           0.052322                           \\n\",\n       \"4                                           0.000040                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6  \\\\\\n\",\n       \"0                                           0.003005                           \\n\",\n       \"1                                           0.000096                           \\n\",\n       \"2                                           0.000032                           \\n\",\n       \"3                                           0.002627                           \\n\",\n       \"4                                           0.001384                           \\n\",\n       \"\\n\",\n       \"                                      ...                                      \\\\\\n\",\n       \"0                                     ...                                       \\n\",\n       \"1                                     ...                                       \\n\",\n       \"2                                     ...                                       \\n\",\n       \"3                                     ...                                       \\n\",\n       \"4                                     ...                                       \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23  \\\\\\n\",\n       \"0                                           0.003351                            \\n\",\n       \"1                                           0.010309                            \\n\",\n       \"2                                           0.018767                            \\n\",\n       \"3                                           0.001580                            \\n\",\n       \"4                                           0.013445                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24  \\\\\\n\",\n       \"0                                           0.013561                            \\n\",\n       \"1                                           0.001055                            \\n\",\n       \"2                                           0.022292                            \\n\",\n       \"3                                           0.145462                            \\n\",\n       \"4                                           0.003754                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25  \\\\\\n\",\n       \"0                                           0.002040                            \\n\",\n       \"1                                           0.001062                            \\n\",\n       \"2                                           0.077598                            \\n\",\n       \"3                                           0.000637                            \\n\",\n       \"4                                           0.220090                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26  \\\\\\n\",\n       \"0                                           0.001682                            \\n\",\n       \"1                                           0.006205                            \\n\",\n       \"2                                           0.033979                            \\n\",\n       \"3                                           0.000337                            \\n\",\n       \"4                                           0.081232                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27  \\\\\\n\",\n       \"0                                       4.412969e-04                            \\n\",\n       \"1                                       9.439933e-07                            \\n\",\n       \"2                                       8.196229e-05                            \\n\",\n       \"3                                       3.909138e-04                            \\n\",\n       \"4                                       7.920414e-05                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28  \\\\\\n\",\n       \"0                                       6.086852e-17                            \\n\",\n       \"1                                       5.250679e-18                            \\n\",\n       \"2                                       3.315851e-11                            \\n\",\n       \"3                                       1.304484e-21                            \\n\",\n       \"4                                       2.406181e-13                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29  \\\\\\n\",\n       \"0                                           0.001606                            \\n\",\n       \"1                                           0.001204                            \\n\",\n       \"2                                           0.007313                            \\n\",\n       \"3                                           0.011515                            \\n\",\n       \"4                                           0.150817                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30  \\\\\\n\",\n       \"0                                           0.001379                            \\n\",\n       \"1                                           0.000150                            \\n\",\n       \"2                                           0.002565                            \\n\",\n       \"3                                           0.000922                            \\n\",\n       \"4                                           0.014913                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31  \\\\\\n\",\n       \"0                                           0.014635                            \\n\",\n       \"1                                           0.015252                            \\n\",\n       \"2                                           0.118167                            \\n\",\n       \"3                                           0.029867                            \\n\",\n       \"4                                           0.071632                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32  \\n\",\n       \"0                                           0.000032                           \\n\",\n       \"1                                           0.000779                           \\n\",\n       \"2                                           0.001603                           \\n\",\n       \"3                                           0.000001                           \\n\",\n       \"4                                           0.000142                           \\n\",\n       \"\\n\",\n       \"[5 rows x 135 columns]\"\n      ]\n     },\n     \"execution_count\": 28,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"test_performance_df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 29,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"base_uri\": \"https://localhost:8080/\",\n     \"height\": 233\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 41,\n     \"status\": \"ok\",\n     \"timestamp\": 1530641286091,\n     \"user\": {\n      \"displayName\": \"Flavien Prost\",\n      \"photoUrl\": \"//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg\",\n      \"userId\": \"100080410554240838905\"\n     },\n     \"user_tz\": 240\n    },\n    \"id\": \"Ln2BXOg4Q6GP\",\n    \"outputId\": \"bb5288e8-9f10-4796-b36e-42f5c02cb148\"\n   },\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>tokens</th>\\n\",\n       \"      <th>gender</th>\\n\",\n       \"      <th>label</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6</th>\\n\",\n       \"      <th>...</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31</th>\\n\",\n       \"      <th>tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>[in, her, role, ,, she, is, a, member, of, an,...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>17</td>\\n\",\n       \"      <td>0.001687</td>\\n\",\n       \"      <td>1.814099e-11</td>\\n\",\n       \"      <td>0.002681</td>\\n\",\n       \"      <td>0.009853</td>\\n\",\n       \"      <td>0.004227</td>\\n\",\n       \"      <td>0.055716</td>\\n\",\n       \"      <td>0.003005</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.003351</td>\\n\",\n       \"      <td>0.013561</td>\\n\",\n       \"      <td>0.002040</td>\\n\",\n       \"      <td>0.001682</td>\\n\",\n       \"      <td>4.412969e-04</td>\\n\",\n       \"      <td>6.086852e-17</td>\\n\",\n       \"      <td>0.001606</td>\\n\",\n       \"      <td>0.001379</td>\\n\",\n       \"      <td>0.014635</td>\\n\",\n       \"      <td>0.000032</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>[his, blog, www.donaldhtaylorjr.blogspot.com, ...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.014774</td>\\n\",\n       \"      <td>2.716771e-13</td>\\n\",\n       \"      <td>0.005496</td>\\n\",\n       \"      <td>0.022347</td>\\n\",\n       \"      <td>0.003845</td>\\n\",\n       \"      <td>0.084480</td>\\n\",\n       \"      <td>0.000096</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.010309</td>\\n\",\n       \"      <td>0.001055</td>\\n\",\n       \"      <td>0.001062</td>\\n\",\n       \"      <td>0.006205</td>\\n\",\n       \"      <td>9.439933e-07</td>\\n\",\n       \"      <td>5.250679e-18</td>\\n\",\n       \"      <td>0.001204</td>\\n\",\n       \"      <td>0.000150</td>\\n\",\n       \"      <td>0.015252</td>\\n\",\n       \"      <td>0.000779</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>[he, has, primarily, reported, for, the, atlan...</td>\\n\",\n       \"      <td>M</td>\\n\",\n       \"      <td>12</td>\\n\",\n       \"      <td>0.016779</td>\\n\",\n       \"      <td>8.870694e-16</td>\\n\",\n       \"      <td>0.001688</td>\\n\",\n       \"      <td>0.071343</td>\\n\",\n       \"      <td>0.000560</td>\\n\",\n       \"      <td>0.029823</td>\\n\",\n       \"      <td>0.000032</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.018767</td>\\n\",\n       \"      <td>0.022292</td>\\n\",\n       \"      <td>0.077598</td>\\n\",\n       \"      <td>0.033979</td>\\n\",\n       \"      <td>8.196229e-05</td>\\n\",\n       \"      <td>3.315851e-11</td>\\n\",\n       \"      <td>0.007313</td>\\n\",\n       \"      <td>0.002565</td>\\n\",\n       \"      <td>0.118167</td>\\n\",\n       \"      <td>0.001603</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>[andrea, 's, area, of, expertise, is, in, whol...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.017742</td>\\n\",\n       \"      <td>1.019689e-15</td>\\n\",\n       \"      <td>0.017150</td>\\n\",\n       \"      <td>0.052085</td>\\n\",\n       \"      <td>0.002097</td>\\n\",\n       \"      <td>0.052322</td>\\n\",\n       \"      <td>0.002627</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.001580</td>\\n\",\n       \"      <td>0.145462</td>\\n\",\n       \"      <td>0.000637</td>\\n\",\n       \"      <td>0.000337</td>\\n\",\n       \"      <td>3.909138e-04</td>\\n\",\n       \"      <td>1.304484e-21</td>\\n\",\n       \"      <td>0.011515</td>\\n\",\n       \"      <td>0.000922</td>\\n\",\n       \"      <td>0.029867</td>\\n\",\n       \"      <td>0.000001</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>[dr., milane, was, trained, as, a, national, c...</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>0.015531</td>\\n\",\n       \"      <td>1.783027e-12</td>\\n\",\n       \"      <td>0.196227</td>\\n\",\n       \"      <td>0.016471</td>\\n\",\n       \"      <td>0.002690</td>\\n\",\n       \"      <td>0.000040</td>\\n\",\n       \"      <td>0.001384</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>0.013445</td>\\n\",\n       \"      <td>0.003754</td>\\n\",\n       \"      <td>0.220090</td>\\n\",\n       \"      <td>0.081232</td>\\n\",\n       \"      <td>7.920414e-05</td>\\n\",\n       \"      <td>2.406181e-13</td>\\n\",\n       \"      <td>0.150817</td>\\n\",\n       \"      <td>0.014913</td>\\n\",\n       \"      <td>0.071632</td>\\n\",\n       \"      <td>0.000142</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"<p>5 rows × 135 columns</p>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"                                              tokens gender  label  \\\\\\n\",\n       \"0  [in, her, role, ,, she, is, a, member, of, an,...      F     17   \\n\",\n       \"1  [his, blog, www.donaldhtaylorjr.blogspot.com, ...      M     25   \\n\",\n       \"2  [he, has, primarily, reported, for, the, atlan...      M     12   \\n\",\n       \"3  [andrea, 's, area, of, expertise, is, in, whol...      F     25   \\n\",\n       \"4  [dr., milane, was, trained, as, a, national, c...      F     25   \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_0  \\\\\\n\",\n       \"0                                           0.001687                           \\n\",\n       \"1                                           0.014774                           \\n\",\n       \"2                                           0.016779                           \\n\",\n       \"3                                           0.017742                           \\n\",\n       \"4                                           0.015531                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_1  \\\\\\n\",\n       \"0                                       1.814099e-11                           \\n\",\n       \"1                                       2.716771e-13                           \\n\",\n       \"2                                       8.870694e-16                           \\n\",\n       \"3                                       1.019689e-15                           \\n\",\n       \"4                                       1.783027e-12                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_2  \\\\\\n\",\n       \"0                                           0.002681                           \\n\",\n       \"1                                           0.005496                           \\n\",\n       \"2                                           0.001688                           \\n\",\n       \"3                                           0.017150                           \\n\",\n       \"4                                           0.196227                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3  \\\\\\n\",\n       \"0                                           0.009853                           \\n\",\n       \"1                                           0.022347                           \\n\",\n       \"2                                           0.071343                           \\n\",\n       \"3                                           0.052085                           \\n\",\n       \"4                                           0.016471                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_4  \\\\\\n\",\n       \"0                                           0.004227                           \\n\",\n       \"1                                           0.003845                           \\n\",\n       \"2                                           0.000560                           \\n\",\n       \"3                                           0.002097                           \\n\",\n       \"4                                           0.002690                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_5  \\\\\\n\",\n       \"0                                           0.055716                           \\n\",\n       \"1                                           0.084480                           \\n\",\n       \"2                                           0.029823                           \\n\",\n       \"3                                           0.052322                           \\n\",\n       \"4                                           0.000040                           \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_6  \\\\\\n\",\n       \"0                                           0.003005                           \\n\",\n       \"1                                           0.000096                           \\n\",\n       \"2                                           0.000032                           \\n\",\n       \"3                                           0.002627                           \\n\",\n       \"4                                           0.001384                           \\n\",\n       \"\\n\",\n       \"                                      ...                                      \\\\\\n\",\n       \"0                                     ...                                       \\n\",\n       \"1                                     ...                                       \\n\",\n       \"2                                     ...                                       \\n\",\n       \"3                                     ...                                       \\n\",\n       \"4                                     ...                                       \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_23  \\\\\\n\",\n       \"0                                           0.003351                            \\n\",\n       \"1                                           0.010309                            \\n\",\n       \"2                                           0.018767                            \\n\",\n       \"3                                           0.001580                            \\n\",\n       \"4                                           0.013445                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_24  \\\\\\n\",\n       \"0                                           0.013561                            \\n\",\n       \"1                                           0.001055                            \\n\",\n       \"2                                           0.022292                            \\n\",\n       \"3                                           0.145462                            \\n\",\n       \"4                                           0.003754                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_25  \\\\\\n\",\n       \"0                                           0.002040                            \\n\",\n       \"1                                           0.001062                            \\n\",\n       \"2                                           0.077598                            \\n\",\n       \"3                                           0.000637                            \\n\",\n       \"4                                           0.220090                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_26  \\\\\\n\",\n       \"0                                           0.001682                            \\n\",\n       \"1                                           0.006205                            \\n\",\n       \"2                                           0.033979                            \\n\",\n       \"3                                           0.000337                            \\n\",\n       \"4                                           0.081232                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_27  \\\\\\n\",\n       \"0                                       4.412969e-04                            \\n\",\n       \"1                                       9.439933e-07                            \\n\",\n       \"2                                       8.196229e-05                            \\n\",\n       \"3                                       3.909138e-04                            \\n\",\n       \"4                                       7.920414e-05                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_28  \\\\\\n\",\n       \"0                                       6.086852e-17                            \\n\",\n       \"1                                       5.250679e-18                            \\n\",\n       \"2                                       3.315851e-11                            \\n\",\n       \"3                                       1.304484e-21                            \\n\",\n       \"4                                       2.406181e-13                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_29  \\\\\\n\",\n       \"0                                           0.001606                            \\n\",\n       \"1                                           0.001204                            \\n\",\n       \"2                                           0.007313                            \\n\",\n       \"3                                           0.011515                            \\n\",\n       \"4                                           0.150817                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_30  \\\\\\n\",\n       \"0                                           0.001379                            \\n\",\n       \"1                                           0.000150                            \\n\",\n       \"2                                           0.002565                            \\n\",\n       \"3                                           0.000922                            \\n\",\n       \"4                                           0.014913                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_31  \\\\\\n\",\n       \"0                                           0.014635                            \\n\",\n       \"1                                           0.015252                            \\n\",\n       \"2                                           0.118167                            \\n\",\n       \"3                                           0.029867                            \\n\",\n       \"4                                           0.071632                            \\n\",\n       \"\\n\",\n       \"   tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828_32  \\n\",\n       \"0                                           0.000032                           \\n\",\n       \"1                                           0.000779                           \\n\",\n       \"2                                           0.001603                           \\n\",\n       \"3                                           0.000001                           \\n\",\n       \"4                                           0.000142                           \\n\",\n       \"\\n\",\n       \"[5 rows x 135 columns]\"\n      ]\n     },\n     \"execution_count\": 29,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"test_bias_df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"8m8QI4qEjtcY\"\n   },\n   \"source\": [\n    \"# Part 4: Run evaluation metrics\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"PhwSHsMtO9fF\"\n   },\n   \"source\": [\n    \"## Performance metrics\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Data Format\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"At this point, our performance data is in DataFrame df, with columns:\\n\",\n    \"\\n\",\n    \"- label: True if the comment is Toxic, False otherwise.\\n\",\n    \"- < model name >: One column per model, cells contain the score from that model.\\n\",\n    \"You can run the analysis below on any data in this format. Subgroup labels can be generated via words in the text as done above, or come from human labels if you have them.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Run AUC\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 30,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     }\n    },\n    \"colab_type\": \"code\",\n    \"id\": \"XUZYCq-6N8MK\"\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"import sklearn.metrics as metrics\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 31,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"25    3295\\n\",\n       \"3      890\\n\",\n       \"22     661\\n\",\n       \"12     542\\n\",\n       \"26     507\\n\",\n       \"23     494\\n\",\n       \"17     481\\n\",\n       \"31     427\\n\",\n       \"30     343\\n\",\n       \"7      268\\n\",\n       \"2      265\\n\",\n       \"18     209\\n\",\n       \"16     202\\n\",\n       \"24     197\\n\",\n       \"29     194\\n\",\n       \"10     185\\n\",\n       \"6      156\\n\",\n       \"0      141\\n\",\n       \"8      102\\n\",\n       \"5       87\\n\",\n       \"20      67\\n\",\n       \"4       58\\n\",\n       \"32      50\\n\",\n       \"19      41\\n\",\n       \"9       39\\n\",\n       \"11      37\\n\",\n       \"27      32\\n\",\n       \"21      30\\n\",\n       \"Name: label, dtype: int64\"\n      ]\n     },\n     \"execution_count\": 31,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"test_performance_df.label.value_counts()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 32,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"0       False\\n\",\n       \"1       False\\n\",\n       \"2       False\\n\",\n       \"3       False\\n\",\n       \"4       False\\n\",\n       \"5       False\\n\",\n       \"6       False\\n\",\n       \"7        True\\n\",\n       \"8       False\\n\",\n       \"9       False\\n\",\n       \"10      False\\n\",\n       \"11      False\\n\",\n       \"12      False\\n\",\n       \"13      False\\n\",\n       \"14      False\\n\",\n       \"15      False\\n\",\n       \"16      False\\n\",\n       \"17      False\\n\",\n       \"18      False\\n\",\n       \"19       True\\n\",\n       \"20      False\\n\",\n       \"21      False\\n\",\n       \"22      False\\n\",\n       \"23      False\\n\",\n       \"24      False\\n\",\n       \"25      False\\n\",\n       \"26      False\\n\",\n       \"27      False\\n\",\n       \"28       True\\n\",\n       \"29      False\\n\",\n       \"        ...  \\n\",\n       \"9970    False\\n\",\n       \"9971    False\\n\",\n       \"9972    False\\n\",\n       \"9973    False\\n\",\n       \"9974     True\\n\",\n       \"9975    False\\n\",\n       \"9976    False\\n\",\n       \"9977    False\\n\",\n       \"9978    False\\n\",\n       \"9979    False\\n\",\n       \"9980    False\\n\",\n       \"9981    False\\n\",\n       \"9982    False\\n\",\n       \"9983    False\\n\",\n       \"9984    False\\n\",\n       \"9985    False\\n\",\n       \"9986    False\\n\",\n       \"9987    False\\n\",\n       \"9988    False\\n\",\n       \"9989    False\\n\",\n       \"9990    False\\n\",\n       \"9991    False\\n\",\n       \"9992    False\\n\",\n       \"9993    False\\n\",\n       \"9994    False\\n\",\n       \"9995    False\\n\",\n       \"9996    False\\n\",\n       \"9997    False\\n\",\n       \"9998    False\\n\",\n       \"9999    False\\n\",\n       \"Name: label, Length: 10000, dtype: bool\"\n      ]\n     },\n     \"execution_count\": 32,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"test_performance_df['label'] == 3\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 33,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"0       0.009853\\n\",\n       \"1       0.022347\\n\",\n       \"2       0.071343\\n\",\n       \"3       0.052085\\n\",\n       \"4       0.016471\\n\",\n       \"5       0.101164\\n\",\n       \"6       0.011855\\n\",\n       \"7       0.001939\\n\",\n       \"8       0.577954\\n\",\n       \"9       0.128116\\n\",\n       \"10      0.014246\\n\",\n       \"11      0.022629\\n\",\n       \"12      0.050127\\n\",\n       \"13      0.205395\\n\",\n       \"14      0.038603\\n\",\n       \"15      0.045960\\n\",\n       \"16      0.652514\\n\",\n       \"17      0.099024\\n\",\n       \"18      0.055800\\n\",\n       \"19      0.167238\\n\",\n       \"20      0.056128\\n\",\n       \"21      0.073346\\n\",\n       \"22      0.040896\\n\",\n       \"23      0.046719\\n\",\n       \"24      0.066602\\n\",\n       \"25      0.015700\\n\",\n       \"26      0.018788\\n\",\n       \"27      0.099245\\n\",\n       \"28      0.744404\\n\",\n       \"29      0.054567\\n\",\n       \"          ...   \\n\",\n       \"9970    0.025056\\n\",\n       \"9971    0.032513\\n\",\n       \"9972    0.059166\\n\",\n       \"9973    0.030145\\n\",\n       \"9974    0.146219\\n\",\n       \"9975    0.132243\\n\",\n       \"9976    0.061952\\n\",\n       \"9977    0.497093\\n\",\n       \"9978    0.154263\\n\",\n       \"9979    0.033800\\n\",\n       \"9980    0.041427\\n\",\n       \"9981    0.000079\\n\",\n       \"9982    0.071002\\n\",\n       \"9983    0.961150\\n\",\n       \"9984    0.017224\\n\",\n       \"9985    0.113003\\n\",\n       \"9986    0.040686\\n\",\n       \"9987    0.729384\\n\",\n       \"9988    0.025192\\n\",\n       \"9989    0.066657\\n\",\n       \"9990    0.025502\\n\",\n       \"9991    0.011763\\n\",\n       \"9992    0.007214\\n\",\n       \"9993    0.004737\\n\",\n       \"9994    0.044174\\n\",\n       \"9995    0.125944\\n\",\n       \"9996    0.199613\\n\",\n       \"9997    0.018891\\n\",\n       \"9998    0.218019\\n\",\n       \"9999    0.052486\\n\",\n       \"Name: tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738_3, Length: 10000, dtype: float64\"\n      ]\n     },\n     \"execution_count\": 33,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"_model = 'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738'\\n\",\n    \"_class = 3\\n\",\n    \"test_performance_df['{}_{}'.format(_model, _class)]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 34,\n   \"metadata\": {\n    \"colab\": {\n     \"autoexec\": {\n      \"startup\": false,\n      \"wait_interval\": 0\n     },\n     \"base_uri\": \"https://localhost:8080/\",\n     \"height\": 35\n    },\n    \"colab_type\": \"code\",\n    \"executionInfo\": {\n     \"elapsed\": 32,\n     \"status\": \"ok\",\n     \"timestamp\": 1530641399913,\n     \"user\": {\n      \"displayName\": \"Flavien Prost\",\n      \"photoUrl\": \"//lh5.googleusercontent.com/-2GvWuP8dy24/AAAAAAAAAAI/AAAAAAAAAHI/aCatYKxJMXQ/s50-c-k-no/photo.jpg\",\n      \"userId\": \"100080410554240838905\"\n     },\n     \"user_tz\": 240\n    },\n    \"id\": \"yc8SWZbqMwA4\",\n    \"outputId\": \"6e9399b8-ce22-42bb-c318-959bae73f6c0\",\n    \"scrolled\": true\n   },\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.472880379306\\n\",\n      \"Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\\n\",\n      \"Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.494346987625\\n\",\n      \"Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.5094779166\\n\",\n      \"Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.579115768006\\n\",\n      \"Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.495869234756\\n\",\n      \"Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.468048349118\\n\",\n      \"Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.485770898896\\n\",\n      \"Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.491489665173\\n\",\n      \"Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.47350564638\\n\",\n      \"Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.488175572414\\n\",\n      \"Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.52613046651\\n\",\n      \"Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.496119960142\\n\",\n      \"Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\\n\",\n      \"Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\\n\",\n      \"Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\\n\",\n      \"Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.520060671101\\n\",\n      \"Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.502598042781\\n\",\n      \"Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.471809136308\\n\",\n      \"Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.589720292223\\n\",\n      \"Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.464268809982\\n\",\n      \"Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.451838849883\\n\",\n      \"Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.501252940388\\n\",\n      \"Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.522887952293\\n\",\n      \"Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.50126994171\\n\",\n      \"Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.502592883032\\n\",\n      \"Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.4976489476\\n\",\n      \"Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.413984124197\\n\",\n      \"Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: nan\\n\",\n      \"Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.485232058639\\n\",\n      \"Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.475149523707\\n\",\n      \"Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.512695371032\\n\",\n      \"Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.443107537688\\n\",\n      \"Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.473124962683\\n\",\n      \"Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\\n\",\n      \"Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.502436065161\\n\",\n      \"Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.497505395972\\n\",\n      \"Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.533997183665\\n\",\n      \"Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.516225645878\\n\",\n      \"Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.479381557424\\n\",\n      \"Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.503250547509\\n\",\n      \"Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.501472866374\\n\",\n      \"Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.511796004417\\n\",\n      \"Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.472370750781\\n\",\n      \"Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.503774777488\\n\",\n      \"Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.482292660736\\n\",\n      \"Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\\n\",\n      \"Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\\n\",\n      \"Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\\n\",\n      \"Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.509781244505\\n\",\n      \"Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.511501561927\\n\",\n      \"Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.466850476392\\n\",\n      \"Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.616544907291\\n\",\n      \"Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.517680398972\\n\",\n      \"Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.48543965229\\n\",\n      \"Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498092928991\\n\",\n      \"Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.527383088967\\n\",\n      \"Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.516476102053\\n\",\n      \"Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498915515\\n\",\n      \"Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.498317975812\\n\",\n      \"Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.439794843499\\n\",\n      \"Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: nan\\n\",\n      \"Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.509969175195\\n\"\n     ]\n    },\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/sklearn/metrics/ranking.py:571: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless\\n\",\n      \"  UndefinedMetricWarning)\\n\"\n     ]\n    },\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.493638808206\\n\",\n      \"Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.508299713945\\n\",\n      \"Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.457780904523\\n\",\n      \"Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.496740926496\\n\",\n      \"Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\\n\",\n      \"Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499153608357\\n\",\n      \"Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499355443456\\n\",\n      \"Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.519405656255\\n\",\n      \"Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.510566062676\\n\",\n      \"Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.480932677982\\n\",\n      \"Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.492101760004\\n\",\n      \"Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.521062880598\\n\",\n      \"Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.46758254629\\n\",\n      \"Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.475540747064\\n\",\n      \"Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.540092938467\\n\",\n      \"Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.486065994621\\n\",\n      \"Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\\n\",\n      \"Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\\n\",\n      \"Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\\n\",\n      \"Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.488949553253\\n\",\n      \"Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.512517147563\\n\",\n      \"Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.480352770023\\n\",\n      \"Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.524139214683\\n\",\n      \"Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.531170784555\\n\",\n      \"Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.486539618857\\n\",\n      \"Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.493480481944\\n\",\n      \"Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.493649014345\\n\",\n      \"Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.519584546531\\n\",\n      \"Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.502616827295\\n\",\n      \"Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.499241317853\\n\",\n      \"Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.527983296549\\n\",\n      \"Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: nan\\n\",\n      \"Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.513514238074\\n\",\n      \"Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.505267708646\\n\",\n      \"Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.526942603747\\n\",\n      \"Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.416369849246\\n\",\n      \"Auc for class 0 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.492310370551\\n\",\n      \"Auc for class 1 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\\n\",\n      \"Auc for class 2 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.510422808191\\n\",\n      \"Auc for class 3 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.497258969647\\n\",\n      \"Auc for class 4 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.533468253803\\n\",\n      \"Auc for class 5 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.51988275004\\n\",\n      \"Auc for class 6 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.446890074912\\n\",\n      \"Auc for class 7 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.470106311844\\n\",\n      \"Auc for class 8 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.480683362454\\n\",\n      \"Auc for class 9 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.519891680117\\n\",\n      \"Auc for class 10 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.498969861354\\n\",\n      \"Auc for class 11 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.49575049304\\n\",\n      \"Auc for class 12 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.496308597575\\n\",\n      \"Auc for class 13 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\\n\",\n      \"Auc for class 14 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\\n\",\n      \"Auc for class 15 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\\n\",\n      \"Auc for class 16 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.497497468669\\n\",\n      \"Auc for class 17 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.498361194233\\n\",\n      \"Auc for class 18 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.454219503411\\n\",\n      \"Auc for class 19 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.554294558911\\n\",\n      \"Auc for class 20 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.510198929845\\n\",\n      \"Auc for class 21 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.443848211301\\n\",\n      \"Auc for class 22 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.511251516464\\n\",\n      \"Auc for class 23 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.527593056506\\n\",\n      \"Auc for class 24 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.517610635095\\n\",\n      \"Auc for class 25 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.507171714086\\n\",\n      \"Auc for class 26 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.494850664384\\n\",\n      \"Auc for class 27 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.433402513042\\n\",\n      \"Auc for class 28 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: nan\\n\",\n      \"Auc for class 29 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.529500137723\\n\",\n      \"Auc for class 30 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.485269677036\\n\",\n      \"Auc for class 31 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.513662670014\\n\",\n      \"Auc for class 32 model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.429722613065\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"auc_list = []\\n\",\n    \"for _model in MODEL_NAMES:\\n\",\n    \"    for _class in CLASS_NAMES:\\n\",\n    \"        fpr, tpr, thresholds = metrics.roc_curve(\\n\",\n    \"            test_performance_df['label'] == _class,\\n\",\n    \"            test_performance_df['{}_{}'.format(_model, _class)])\\n\",\n    \"        _auc = metrics.auc(fpr, tpr)\\n\",\n    \"        auc_list.append(_auc)\\n\",\n    \"        print ('Auc for class {} model {}: {}'.format(_class, _model, _auc))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 55,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_class_from_col_name(col_name):\\n\",\n    \"    pattern = r'^.*_(\\\\d+)$'\\n\",\n    \"    return int(re.search(pattern, col_name).group(1))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 62,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def find_best_class(df, model_name, class_names):\\n\",\n    \"    model_class_names = ['{}_{}'.format(model_name, class_name) for class_name in class_names]\\n\",\n    \"    sub_df = df[model_class_names]\\n\",\n    \"    df['{}_class'.format(model_name)] = sub_df.idxmax(axis=1).apply(get_class_from_col_name)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 63,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"for _model in MODEL_NAMES:\\n\",\n    \"    find_best_class(test_performance_df, _model, CLASS_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 64,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132738: 0.0572\\n\",\n      \"Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132748: 0.0639\\n\",\n      \"Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132820: 0.0681\\n\",\n      \"Accuracy for model tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190306_132828: 0.0623\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"accuracy_list = []\\n\",\n    \"for _model in MODEL_NAMES:\\n\",\n    \"    is_correct = (test_performance_df['{}_class'.format(_model)] == test_performance_df['label'])\\n\",\n    \"    _acc = sum(is_correct)/len(is_correct)\\n\",\n    \"    accuracy_list.append(_acc)\\n\",\n    \"    print ('Accuracy for model {}: {}'.format(_model, _acc))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"vTrKsfIcxoBh\"\n   },\n   \"source\": [\n    \"## Unintended Bias Metrics\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"colab_type\": \"text\",\n    \"id\": \"D3ZJSKY8FHFH\"\n   },\n   \"source\": [\n    \"### Data Format\\n\",\n    \"At this point, our bias data is in DataFrame df, with columns:\\n\",\n    \"\\n\",\n    \"*   label: True if the comment is Toxic, False otherwise.\\n\",\n    \"*   < model name >: One column per model, cells contain the score from that model.\\n\",\n    \"*   < subgroup >: One column per identity, True if the comment mentions this identity.\\n\",\n    \"\\n\",\n    \"You can run the analysis below on any data in this format. Subgroup labels can be \\n\",\n    \"generated via words in the text as done above, or come from human labels if you have them.\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 35,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [\n    {\n     \"ename\": \"KeyError\",\n     \"evalue\": \"'male'\",\n     \"output_type\": \"error\",\n     \"traceback\": [\n      \"\\u001b[0;31m---------------------------------------------------------------------------\\u001b[0m\",\n      \"\\u001b[0;31mKeyError\\u001b[0m                                  Traceback (most recent call last)\",\n      \"\\u001b[0;32m<ipython-input-35-d94e49a61360>\\u001b[0m in \\u001b[0;36m<module>\\u001b[0;34m()\\u001b[0m\\n\\u001b[1;32m      1\\u001b[0m \\u001b[0midentity_terms_civil_included\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0;34m[\\u001b[0m\\u001b[0;34m]\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m      2\\u001b[0m \\u001b[0;32mfor\\u001b[0m \\u001b[0m_term\\u001b[0m \\u001b[0;32min\\u001b[0m \\u001b[0minput_fn_example\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0midentity_terms_civil\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m----> 3\\u001b[0;31m     \\u001b[0;32mif\\u001b[0m \\u001b[0msum\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mtest_bias_df\\u001b[0m\\u001b[0;34m[\\u001b[0m\\u001b[0m_term\\u001b[0m\\u001b[0;34m]\\u001b[0m\\u001b[0;34m)\\u001b[0m \\u001b[0;34m>=\\u001b[0m \\u001b[0;36m20\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m      4\\u001b[0m         \\u001b[0;32mprint\\u001b[0m \\u001b[0;34m(\\u001b[0m\\u001b[0;34m'keeping {}'\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mformat\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0m_term\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m      5\\u001b[0m         \\u001b[0midentity_terms_civil_included\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mappend\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0m_term\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/frame.pyc\\u001b[0m in \\u001b[0;36m__getitem__\\u001b[0;34m(self, key)\\u001b[0m\\n\\u001b[1;32m   2137\\u001b[0m             \\u001b[0;32mreturn\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_getitem_multilevel\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mkey\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   2138\\u001b[0m         \\u001b[0;32melse\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m-> 2139\\u001b[0;31m             \\u001b[0;32mreturn\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_getitem_column\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mkey\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m   2140\\u001b[0m \\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   2141\\u001b[0m     \\u001b[0;32mdef\\u001b[0m \\u001b[0m_getitem_column\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mself\\u001b[0m\\u001b[0;34m,\\u001b[0m \\u001b[0mkey\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/frame.pyc\\u001b[0m in \\u001b[0;36m_getitem_column\\u001b[0;34m(self, key)\\u001b[0m\\n\\u001b[1;32m   2144\\u001b[0m         \\u001b[0;31m# get column\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   2145\\u001b[0m         \\u001b[0;32mif\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mcolumns\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mis_unique\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m-> 2146\\u001b[0;31m             \\u001b[0;32mreturn\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_get_item_cache\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mkey\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m   2147\\u001b[0m \\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   2148\\u001b[0m         \\u001b[0;31m# duplicate columns & possible reduce dimensionality\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/generic.pyc\\u001b[0m in \\u001b[0;36m_get_item_cache\\u001b[0;34m(self, item)\\u001b[0m\\n\\u001b[1;32m   1840\\u001b[0m         \\u001b[0mres\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mcache\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mget\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mitem\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   1841\\u001b[0m         \\u001b[0;32mif\\u001b[0m \\u001b[0mres\\u001b[0m \\u001b[0;32mis\\u001b[0m \\u001b[0mNone\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m-> 1842\\u001b[0;31m             \\u001b[0mvalues\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_data\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mget\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mitem\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m   1843\\u001b[0m             \\u001b[0mres\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_box_item_values\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mitem\\u001b[0m\\u001b[0;34m,\\u001b[0m \\u001b[0mvalues\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   1844\\u001b[0m             \\u001b[0mcache\\u001b[0m\\u001b[0;34m[\\u001b[0m\\u001b[0mitem\\u001b[0m\\u001b[0;34m]\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mres\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/internals.pyc\\u001b[0m in \\u001b[0;36mget\\u001b[0;34m(self, item, fastpath)\\u001b[0m\\n\\u001b[1;32m   3841\\u001b[0m \\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   3842\\u001b[0m             \\u001b[0;32mif\\u001b[0m \\u001b[0;32mnot\\u001b[0m \\u001b[0misna\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mitem\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m-> 3843\\u001b[0;31m                 \\u001b[0mloc\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mitems\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mget_loc\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mitem\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m   3844\\u001b[0m             \\u001b[0;32melse\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   3845\\u001b[0m                 \\u001b[0mindexer\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mnp\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0marange\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mlen\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mitems\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m[\\u001b[0m\\u001b[0misna\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mitems\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m]\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;32m/Users/nthain/Documents/repos/conversationai-models/model_evaluation/.venv/lib/python2.7/site-packages/pandas/core/indexes/base.pyc\\u001b[0m in \\u001b[0;36mget_loc\\u001b[0;34m(self, key, method, tolerance)\\u001b[0m\\n\\u001b[1;32m   2525\\u001b[0m                 \\u001b[0;32mreturn\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_engine\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mget_loc\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mkey\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   2526\\u001b[0m             \\u001b[0;32mexcept\\u001b[0m \\u001b[0mKeyError\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m-> 2527\\u001b[0;31m                 \\u001b[0;32mreturn\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_engine\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mget_loc\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_maybe_cast_indexer\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mkey\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m   2528\\u001b[0m \\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m   2529\\u001b[0m         \\u001b[0mindexer\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mget_indexer\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0;34m[\\u001b[0m\\u001b[0mkey\\u001b[0m\\u001b[0;34m]\\u001b[0m\\u001b[0;34m,\\u001b[0m \\u001b[0mmethod\\u001b[0m\\u001b[0;34m=\\u001b[0m\\u001b[0mmethod\\u001b[0m\\u001b[0;34m,\\u001b[0m \\u001b[0mtolerance\\u001b[0m\\u001b[0;34m=\\u001b[0m\\u001b[0mtolerance\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;32mpandas/_libs/index.pyx\\u001b[0m in \\u001b[0;36mpandas._libs.index.IndexEngine.get_loc\\u001b[0;34m()\\u001b[0m\\n\",\n      \"\\u001b[0;32mpandas/_libs/index.pyx\\u001b[0m in \\u001b[0;36mpandas._libs.index.IndexEngine.get_loc\\u001b[0;34m()\\u001b[0m\\n\",\n      \"\\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\\u001b[0m in \\u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\\u001b[0;34m()\\u001b[0m\\n\",\n      \"\\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\\u001b[0m in \\u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\\u001b[0;34m()\\u001b[0m\\n\",\n      \"\\u001b[0;31mKeyError\\u001b[0m: 'male'\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"identity_terms_civil_included = []\\n\",\n    \"for _term in input_fn_example.identity_terms_civil:\\n\",\n    \"    if sum(test_bias_df[_term]) >= 20:\\n\",\n    \"        print ('keeping {}'.format(_term))\\n\",\n    \"        identity_terms_civil_included.append(_term)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"test_bias_df['model_1'] = test_bias_df['tf_gru_attention_civil:v_20181109_164318']\\n\",\n    \"test_bias_df['model_2'] = test_bias_df['tf_gru_attention_civil:v_20181109_164403']\\n\",\n    \"test_bias_df['model_3'] = test_bias_df['tf_gru_attention_civil:v_20181109_164535']\\n\",\n    \"test_bias_df['model_4'] = test_bias_df['tf_gru_attention_civil:v_20181109_164630']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"MODEL_NAMES = ['model_1', 'model_2', 'model_3', 'model_4']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"bias_metrics = model_bias_analysis.compute_bias_metrics_for_models(test_bias_df, identity_terms_civil_included, MODEL_NAMES, 'label')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model_bias_analysis.plot_auc_heatmap(bias_metrics, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model_bias_analysis.plot_aeg_heatmap(bias_metrics, MODEL_NAMES)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"colab\": {\n   \"collapsed_sections\": [],\n   \"default_view\": {},\n   \"name\": \"jigsaw-evaluation-pipeline.ipynb\",\n   \"provenance\": [],\n   \"version\": \"0.3.2\",\n   \"views\": {}\n  },\n  \"kernelspec\": {\n   \"display_name\": \"models_eval\",\n   \"language\": \"python\",\n   \"name\": \"models_eval\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 2\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython2\",\n   \"version\": \"2.7.10\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 1\n}\n"
  },
  {
    "path": "model_evaluation/requirements.txt",
    "content": "google-api-python-client==1.7.3\nMarkdown==2.6.11\nnltk==3.9\nnumpy==1.22.0\npandas==0.22.0\nrequests==2.32.2\nseaborn==0.8.1\nscikit-learn==0.19.1\nscipy==1.10.0\nsklearn==0.0\nsix==1.11.0\ntensorflow==2.12.1\njupyter==1.0.0\nmatplotlib==2.0.2\nnltk==3.9\n\n"
  },
  {
    "path": "model_evaluation/score_bias_data.sh",
    "content": "#!/bin/bash\n\nMODEL_NAMES='tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113247,'\\\n'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113241,'\\\n'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113114,'\\\n'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113106,'\\\n'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_163707,'\\\n'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_163723'\n\nCLASS_NAMES='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32'\nTEST_DATA='biasbios'\nOUTPUT_PATH='gs://conversationai-models/biosbias/scored_data/standard_test.csv'\n\necho \"\"\"\nRunning...\n\npython score_test_data.py \\\\\n --model_names=$MODEL_NAMES \\\\\n --class_names=$CLASS_NAMES \\\\\n --test_data=$TEST_DATA \\\\\n --output_path=$OUTPUT_PATH\n\"\"\"\n\npython score_test_data.py \\\n --model_names=$MODEL_NAMES \\\n --class_names=$CLASS_NAMES \\\n --test_data=$TEST_DATA \\\n --output_path=$OUTPUT_PATH"
  },
  {
    "path": "model_evaluation/score_scrubbed_data.sh",
    "content": "#!/bin/bash\n\nMODEL_NAMES='tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_113045,'\\\n'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954'\n\nCLASS_NAMES='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32'\nTEST_DATA='scrubbed_biasbios'\nOUTPUT_PATH='gs://conversationai-models/biosbias/scored_data/scrubbed_test.csv'\n\necho \"\"\"\nRunning...\n\npython score_test_data.py \\\\\n --model_names=$MODEL_NAMES \\\\\n --class_names=$CLASS_NAMES \\\\\n --test_data=$TEST_DATA \\\\\n --output_path=$OUTPUT_PATH\n\"\"\"\n\npython score_test_data.py \\\n --model_names=$MODEL_NAMES \\\n --class_names=$CLASS_NAMES \\\n --test_data=$TEST_DATA \\\n --output_path=$OUTPUT_PATH"
  },
  {
    "path": "model_evaluation/score_test_data.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Convenience script to score some data with CMLE models.\"\"\"\n\nimport getpass\nimport nltk\nimport os\nimport pandas as pd\nimport random\nimport tensorflow as tf\n\nimport input_fn_example\nfrom utils_export.dataset import Dataset, Model\nfrom utils_export import utils_cloudml\nfrom utils_export import utils_tfrecords\n\ntf.app.flags.DEFINE_string(\n    'model_names', None, 'Comma separated list of model names deployed on ML Engine.')\ntf.app.flags.DEFINE_string(\n    'class_names', None, 'Comma separated list of class names to evaluate.')\ntf.app.flags.DEFINE_string('test_data', None,\n                           'Test data to evaluate on. Must correspond to one in input_fn_example.py.')\ntf.app.flags.DEFINE_string('output_path', None,\n                           'Path to write scored test data.')\ntf.app.flags.DEFINE_string('project_name', 'conversationai-models',\n                           'Name of GCS project.')\ntf.app.flags.DEFINE_string('text_feature_name', 'tokens',\n                           'Name of the text feature (see serving function call in run.py).')\ntf.app.flags.DEFINE_string('sentence_key', 'comment_key',\n                           'Name of input key (see serving function call in run.py).')\ntf.app.flags.DEFINE_string('prediction_name', 'probabilities',\n                           'Name of output prediction.')\ntf.app.flags.DEFINE_integer('dataset_size', 100000,\n                            'Maximum size of dataset to score.')\n\nFLAGS = tf.app.flags.FLAGS\n\n\ndef get_input_fn(test_data, tokenizer, model_input_comment_field):\n  if test_data == 'biasbios':\n    return input_fn_example.create_input_fn_biasbios(tokenizer,\n                                                     model_input_comment_field)\n  elif test_data == 'scrubbed_biasbios':\n    return input_fn_example.create_input_fn_biasbios(tokenizer,\n                                                     model_input_comment_field,\n                                                     scrubbed=True)\n  else:\n    raise ValueError('Dataset not currently supported.')\n\n\ndef tokenizer(text, lowercase=True):\n  \"\"\"Converts text to a list of words.\n\n  Args:\n    text: piece of text to tokenize (string).\n    lowercase: whether to include lowercasing in preprocessing (boolean).\n    tokenizer: Python function to tokenize the text on.\n\n  Returns:\n    A list of strings (words).\n  \"\"\"\n  words = nltk.word_tokenize(text.decode('utf-8'))\n  if lowercase:\n    words = [w.lower() for w in words]\n  return words\n\n\ndef score_data(model_names,\n               class_names,\n               test_data,\n               output_path,\n               project_name,\n               text_feature_name,\n               sentence_key,\n               prediction_name,\n               dataset_size):\n  \"\"\"Scores a test dataset with ML engine models and writes output as csv.\n\n  Args:\n    model_names: list of model names deployed on ML Engine.\n    class_names: list of class names to evaluate.\n    test_data: test data to evaluate on, must be defined in get_input_fn.\n    output_path: path to write scored test data.\n    project_name: name of Google Cloud project.\n    text_feature_name: name of the text feature (see serving function call in run.py).\n    sentence_key: name of input key (see serving function call in run.py).\n    prediction_name: name of output prediction.\n    dataset_size: maximum size of dataset to score.\n  \"\"\"\n  os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530\n  nltk.download('punkt')\n\n  # Load data.\n  input_fn = get_input_fn(test_data,\n    tokenizer,\n    model_input_comment_field=text_feature_name,\n    )\n  performance_dataset_dir = os.path.join(\n      'gs://conversationai-models/',\n      getpass.getuser(),\n      'tfrecords',\n      'performance_dataset_dir_3')\n\n  dataset = Dataset(input_fn, performance_dataset_dir)\n  random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future\n\n  # Define and call model.\n  model_input_spec = {\n      text_feature_name: utils_tfrecords.EncodingFeatureSpec.LIST_STRING} #library will use this automatically\n  dataset.load_data(dataset_size, random_filter_keep_rate=0.5)\n  model = Model(\n      feature_keys_spec=model_input_spec,\n      prediction_keys=prediction_name,\n      example_key=sentence_key,\n      model_names=model_names,\n      project_name=project_name)\n  dataset.add_model_prediction_to_data(model, recompute_predictions=True, class_names=class_names)\n  \n  # Save data.\n  scored_test_df = dataset.show_data()\n  scored_test_df.to_csv(tf.gfile.Open(output_path, 'w'), index = False)\n\nif __name__ == \"__main__\":\n  tf.logging.set_verbosity(tf.logging.INFO)\n\n  model_names = [name.strip() for name in FLAGS.model_names.split(',')]\n  print(model_names)\n  class_names = [name.strip() for name in FLAGS.class_names.split(',')]\n  print(class_names)\n  score_data(model_names,\n             class_names,\n             FLAGS.test_data,\n             FLAGS.output_path,\n             FLAGS.project_name,\n             FLAGS.text_feature_name,\n             FLAGS.sentence_key,\n             FLAGS.prediction_name,\n             FLAGS.dataset_size)\n"
  },
  {
    "path": "model_evaluation/utils_export/__init__.py",
    "content": ""
  },
  {
    "path": "model_evaluation/utils_export/dataset.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Defines the dataset structure for evaluation pipeline.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport inspect\nimport os\n\nimport pandas as pd\nimport tensorflow as tf\nfrom tensorflow.python.platform import tf_logging as logging\n\nimport utils_export.utils_cloudml as utils_cloudml\nimport utils_export.utils_tfrecords as utils_tfrecords\n\n# Quota for concurrent prediction jobs\nCMLE_QUOTA_PREDICTION = 7\n\n\nclass Model(object):\n  \"\"\"Defines the spec of a CMLE Model.\n\n    All models (given by `model_names`) need to share the feature_keys_spec,\n      example_key and prediction_keys.\n    Those fields define the inputs (feature_keys_spec, example_key) and output\n      of the models.\n    \"\"\"\n\n  def __init__(self,\n               feature_keys_spec,\n               prediction_keys,\n               model_names,\n               project_name,\n               example_key='example_key'):\n    \"\"\"Initializes a model and defines its signature.\n\n    Args:\n      feature_keys_spec: spec of the tf_records input to the model.\n      prediction_keys: Name of the keys to extract from model outputs.\n      model_names: List of names of the model in Cloud MLE.\n        Format should be $MODEL_NAME:$VERSION. If no version given, will take\n          default version.\n      project_name: name of the gcp project.\n      example_key: name of the example key expected by the model.\n\n    Raises:\n      ValueError: If example_key is included in the feature_spec\n        of if feature_keys_spec does not match required format.\n\n    Note: When used with `Dataset`, the dataframe returned by the input_fn\n      should not contain the `example_key`, as it will be later created by the API.\n    \"\"\"\n\n    utils_tfrecords.is_valid_spec(feature_keys_spec)\n    if example_key in feature_keys_spec:\n      raise ValueError('example_key should not be part of input_data.'\n                       'It will be created when writing to tf-records')\n    self._model_name = model_names\n    self._feature_keys_spec = feature_keys_spec\n    self._prediction_keys = prediction_keys\n    self._project_name = project_name\n    self._example_key = example_key\n\n  def feature_keys_spec(self):\n    return self._feature_keys_spec\n\n  def example_key(self):\n    return self._example_key\n\n  def model_names(self):\n    return self._model_name\n\n  def prediction_keys(self):\n    return self._prediction_keys\n\n  def project_name(self):\n    return self._project_name\n\n  def set_job_ids_prediction(self, job_ids):\n    self._job_ids_prediction = job_ids\n\n  def job_ids_prediction(self):\n    return self._job_ids_prediction\n\n\nclass Dataset(object):\n  \"\"\"Defines a format for every dataset to work with evaluation pipeline.\n\n  Usage:\n\n  input_fn = ... (returns pandas DataFrame).\n  dataset = Dataset(input_fn, dataset_dir) # Verifies that input_fn is ok.\n\n  dataset.load_data(10000)\n\n  model = Model(...)\n  # Next function verifies that models are compatible.\n  dataset.add_model_prediction_to_data(model)\n\n  dataset.show_data()\n  \"\"\"\n\n  def __init__(self, input_fn, dataset_dir):\n    \"\"\"Initialises a `Dataset` instance.\n\n    Args:\n      input_fn: function that returns a pandas `Dataframe`.\n      dataset_dir: Directory where to save the temporary files, in particular\n        tf_records inputs and outputs of CMLE.\n    \"\"\"\n    self.check_input_fn(input_fn)\n    self._input_fn = input_fn\n    self._dataset_dir = dataset_dir\n\n  def show_data(self):\n    if not hasattr(self, 'data'):\n      raise ValueError('Dataset does not have data yet.'\n                       ' You need to run `load_data` first.')\n    return self.data\n\n  def check_input_fn(self, input_fn):\n    \"\"\"Checks if the input_fn meets requirements.\"\"\"\n    args_input_fn = inspect.getargspec(input_fn).args\n    if 'max_n_examples' not in args_input_fn:\n      raise ValueError('input_fn should have (at least) `max_n_examples`'\n                       ' as arguments.')\n\n    loaded_data = input_fn(max_n_examples=1)\n\n    if not isinstance(loaded_data, pd.DataFrame):\n      raise ValueError('input_fn should return a pandas DataFrame.')\n\n    if len(loaded_data) != 1:\n      raise ValueError(\n          'input_fn(max_n_examples=1) should contain 1 row (exactly).')\n    logging.info('input_fn is compatible with the `Dataset` class.')\n\n  def check_compatibility(self, model):\n    \"\"\"Checks that input_fn is compatible with the model.\"\"\"\n\n    if hasattr(self, 'data'):\n      test_df = self.data\n    else:\n      test_df = self._input_fn(max_n_examples=1)\n\n    for key in model.feature_keys_spec():\n      if key not in test_df.columns:\n        raise ValueError(\n            'input_fn must contain at least the feature keys {}'.format(\n                model.feature_keys_spec()))\n    logging.info('Model is compatible with the `Dataset` instance.')\n\n  def load_data(self, max_n_examples, **kwargs):\n    self.data = self._input_fn(max_n_examples=max_n_examples, **kwargs)\n\n  def get_path_input_tf(self):\n    \"\"\"Returns the path to input tf-records (input of CMLE).\"\"\"\n    name = 'input_data.tfrecords'\n    input_path = os.path.join(self._dataset_dir, name)\n    return input_path\n\n  def get_path_prediction(self, model_name):\n    \"\"\"Returns the path to prediction files (output of CMLE).\"\"\"\n    name = 'prediction_data_{}'.format(model_name)\n    prediction_path = os.path.join(self._dataset_dir, name)\n    return prediction_path\n\n  def convert_data_to_tf(self, feature_keys_spec, example_key, overwrite=True):\n    \"\"\"Writes self.data to tf-records.\n\n    Args:\n      feature_keys_spec: the spec of the feature_keys. Only those fields will be\n        written to tf-records.\n      example_key: Name of the field for example_key. The key will be generated\n        on the fly.\n      overwrite: Whether to overwrite the existing tf_records.\n\n    Raises:\n      ValueError: if dataset does not have data loaded.\n    \"\"\"\n\n    if not hasattr(self, 'data'):\n      raise ValueError('Dataset does not have data yet.'\n                       ' You need to run `load_data` first.')\n\n    path_input_tf = self.get_path_input_tf()\n    if tf.gfile.Exists(path_input_tf):\n      if overwrite:\n        logging.info('TF-Records already exist - overwriting them.')\n      else:\n        logging.info('TF-Records already exist - We will use those.')\n        return\n\n    utils_tfrecords.encode_pandas_to_tfrecords(self.data, feature_keys_spec,\n                                               path_input_tf, example_key)\n\n  def call_prediction(self, model):\n    \"\"\"Starts a CMLE batch prediction job for the model.\"\"\"\n\n    path_input_tf = self.get_path_input_tf()\n    if not tf.gfile.Exists(path_input_tf):\n      raise ValueError('Dataset does not have input_tf_records yet.'\n                       ' You need to run `convert_data_to_tf` first.')\n    \n    if len(model.model_names()) > CMLE_QUOTA_PREDICTION:\n      raise ValueError('Model should not contain more than {} versions.'\n                       ' If you need more, split the version into two'\n                       ' different models.'.format(CMLE_QUOTA_PREDICTION))\n\n    job_ids = []\n    for model_name_full in model.model_names():\n\n      model_name_split = model_name_full.split(':')\n      model_name = model_name_split[0]\n      if len(model_name_split) > 1:\n        version = model_name_split[1]\n      else:\n        version = None\n\n      output_pred_path = self.get_path_prediction(model_name_full)\n      job_id = utils_cloudml.call_model_predictions_from_df(\n          project_name=model.project_name(),\n          input_tf_records=path_input_tf,\n          output_prediction_path=output_pred_path,\n          model_name=model_name,\n          version_name=version)\n      job_ids.append(job_id)\n    model.set_job_ids_prediction(job_ids)\n\n  def collect_prediction(self, model, class_names):\n    \"\"\"Collects the predictions of CMLE jobs and adds it to dataframe.\"\"\"\n\n    for model_name in model.model_names():\n      tf_record_prediction = self.get_path_prediction(model_name)\n      self.data = utils_cloudml.add_model_predictions_to_df(\n          self.data,\n          prediction_file=tf_record_prediction,\n          model_col_name=model_name,\n          prediction_name=model.prediction_keys(),\n          example_key=model.example_key(),\n          class_names=class_names)\n\n  def wait_predictions(self, model):\n    \"\"\"Loops until the prediction jobs of the model completed.\"\"\"\n\n    if not hasattr(model, 'job_ids_prediction'):\n      raise ValueError(\n          'Model does not have any `job_ids_prediction`.'\n          ' You need to run `call_prediction` for CMLE batch prediction job.')\n\n    for job_id in model.job_ids_prediction():\n      utils_cloudml.check_job_over(model.project_name(), job_id)\n\n  def add_model_prediction_to_data(self, model, recompute_predictions=True, class_names=None):\n    \"\"\"Computes the prediction of the model and adds it to dataframe.\n\n    Args:\n      model: a `Model` instance.\n      recompute_predictions: Indicates if we run predictions (batch prediction\n        job) or if we load past prediction files. If use past predictions (when\n        False), the data must match exactly (same  number of lines and in same\n        order).\n      class_names (optional): If the model is a multiclass model, you can specify class names.\n          The model will then return a logit value per class instead of a single value.\n    \"\"\"\n    def _compute_predictions_less_than_quota(self, model, need_to_convert_data=True):\n      \"\"\"Runs predictions for a model that has less than $QUOTA versions.\"\"\"\n      if need_to_convert_data:\n        self.convert_data_to_tf(model.feature_keys_spec(), model.example_key())\n      self.call_prediction(model)\n      self.wait_predictions(model)\n\n    self.check_compatibility(model)\n\n    if recompute_predictions:\n\n      num_epochs = int(len(model.model_names()) / CMLE_QUOTA_PREDICTION)\n      for i in range(0, num_epochs + 1):\n        logging.info('Doing batch {}/{}'.format(i, num_epochs))\n        min_index = i*CMLE_QUOTA_PREDICTION\n        max_index = min((i + 1) * CMLE_QUOTA_PREDICTION, len(model.model_names()))\n        sub_model_names = model.model_names()[min_index:max_index]\n        sub_model = Model(\n          model.feature_keys_spec(),\n          model.prediction_keys(),\n          sub_model_names,\n          model.project_name(),\n          model.example_key())\n        need_to_convert_data = (i == 0)\n        _compute_predictions_less_than_quota(self, sub_model, need_to_convert_data)\n\n    else:\n      logging.warning(\n          'Using past predictions. '\n          'the data must match exactly (same number of lines and same order).')\n\n    self.collect_prediction(model, class_names)\n"
  },
  {
    "path": "model_evaluation/utils_export/dataset_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for dataset.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport getpass\nimport os\nimport time\nimport unittest\n\nfrom dataset import Dataset\nfrom dataset import Model\nimport pandas as pd\nfrom utils_tfrecords import EncodingFeatureSpec\n\n\nclass TestCompatibleInputFn(unittest.TestCase):\n  \"\"\"Verifies the compatibility of input_fn with `Dataset`.\"\"\"\n\n  def testCorrect(self):\n\n    def input_fn(max_n_examples):\n      return pd.DataFrame({\n          'comment_text': ['This is one'] * max_n_examples,\n          'label_name': [0] * max_n_examples\n      })\n\n    try:\n      Dataset(input_fn, 'dataset_dir')\n    except ValueError:\n      self.fail('Dataset raised an exception unexpectedly!')\n\n  def testWrongArgInputFn(self):\n\n    def input_fn(other_args=1.0):\n      assert other_args\n      return {'other_feature': ['This is one'], 'label_name': [0]}\n\n    with self.assertRaises(Exception) as context:\n      Dataset(input_fn, 'dataset_dir')\n      self.assertIn('input_fn should have (at least) `max_n_examples`',\n                    str(context.exception))\n\n  def testInputFnWrongType(self):\n\n    def input_fn(max_n_examples):\n      return {\n          'other_feature': ['This is one'] * max_n_examples,\n          'label_name': [0] * max_n_examples\n      }\n\n    with self.assertRaises(Exception) as context:\n      Dataset(input_fn, 'dataset_dir')\n      self.assertIn('input_fn should return a pandas DataFrame.',\n                    str(context.exception))\n\n  def testWrongNumberOfLines(self):\n\n    def input_fn(max_n_examples=1):\n      assert max_n_examples\n      return pd.DataFrame({\n          'comment_text': ['This is one'] * 2,\n          'label_name': [0] * 2\n      })\n\n    with self.assertRaises(Exception) as context:\n      Dataset(input_fn, 'dataset_dir')\n      self.assertIn(\n          'input_fn(max_n_examples=1) should contain 1 row (exactly).',\n          str(context.exception))\n\n\nclass TestModelCompatibleWithInputFn(unittest.TestCase):\n  \"\"\"Verifies the compatibility between input_fn and model.\"\"\"\n\n  def testBadTypeFeatureKeys(self):\n\n    with self.assertRaises(Exception) as context:\n      model = Model(\n          feature_keys_spec='comment_text',\n          prediction_keys='prediction_key',\n          model_names='None',\n          project_name=None)\n      self.assertIn('Spec should be a dictionary', str(context.exception))\n\n  def testInputFnMissingFeatureKeys(self):\n\n    model = Model(\n        feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING},\n        prediction_keys='prediction_key',\n        model_names='None',\n        project_name=None)\n\n    def input_fn(max_n_examples):\n      return pd.DataFrame({\n          'other_feature': ['This is one'] * max_n_examples,\n          'label_name': [0] * max_n_examples\n      })\n\n    with self.assertRaises(Exception) as context:\n      dataset = Dataset(input_fn, 'dataset_dir')\n      dataset.check_compatibility(model)\n      self.assertIn('input_fn must contain at least the feature keys',\n                    str(context.exception))\n\n  def testModelIsCompatibleWithDataset(self):\n    model = Model(\n        feature_keys_spec={'comment_text': EncodingFeatureSpec.LIST_STRING},\n        prediction_keys='prediction_key',\n        model_names='None',\n        project_name=None)\n\n    def input_fn(max_n_examples):\n      return pd.DataFrame({\n          'comment_text': ['This is one'] * max_n_examples,\n          'label_name': [0] * max_n_examples\n      })\n\n    try:\n      dataset = Dataset(input_fn, 'dataset_dir')\n      dataset.check_compatibility(model)\n    except ValueError:\n      self.fail('Dataset raised an exception unexpectedly!')\n\n\nclass TestEndPipeline(unittest.TestCase):\n  \"\"\"Verifies end-to-end use of dataset.\"\"\"\n\n  test_version = str(int(time.time()))\n\n  def setUp(self):\n\n    def input_fn_test(max_n_examples):\n      return pd.DataFrame(\n          {'comment_text': [['This', 'is', 'one']] * max_n_examples})\n\n    gcs_path_test = os.path.join('gs://kaggle-model-experiments/',\n                                 getpass.getuser(), 'unittest', 'dataset_test',\n                                 TestEndPipeline.test_version)\n    self.dataset = Dataset(input_fn_test, gcs_path_test)\n    self.dataset.load_data(5)\n\n    model_input_spec = {\n        'comment_text': EncodingFeatureSpec.LIST_STRING,\n    }\n    self.model = Model(\n        feature_keys_spec=model_input_spec,\n        prediction_keys='frac_neg/logistic',\n        example_key='comment_key',\n        model_names=[\n            'tf_gru_attention:v_20180914_163804',\n            'tf_gru_attention:v_20180823_133625'\n        ],\n        project_name='wikidetox')\n\n  def testComputePredictions(self):\n    try:\n      self.dataset.add_model_prediction_to_data(self.model)\n    except ValueError:\n      self.fail('Dataset raised an exception unexpectedly!')\n\n  def testLoadPredictions(self):\n    try:\n      self.dataset.add_model_prediction_to_data(\n          self.model, recompute_predictions=False)\n    except ValueError:\n      self.fail('Dataset raised an exception unexpectedly!')\n\n\nif __name__ == '__main__':\n  unittest.main()\n"
  },
  {
    "path": "model_evaluation/utils_export/deploy_list_models.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Deploys all models that have been saved in a list of directories.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport argparse\nimport datetime\nimport os\nimport sys\nimport time\n\nfrom googleapiclient import discovery\nfrom googleapiclient import errors\nimport tensorflow as tf\nfrom tensorflow.python.lib.io import file_io\nfrom tensorflow.python.platform import tf_logging as logging\n\n# Maximum number of version that can be created concurrently.\nCLOUD_ML_VERSION_CREATE_QUOTA = 10\n\n\ndef get_list_models_to_export(parent_model_dir):\n  \"\"\"Gets the paths of all models that are in parent_model_dir.\"\"\"\n  _list = []\n  for subdirectory, _, files in tf.gfile.Walk(parent_model_dir):\n    if 'saved_model.pb' in files:  # Indicator of a saved model.\n      _list.append(subdirectory)\n  return _list\n\n\ndef check_model_exists(project_name, model_name):\n  \"\"\"Verifies if a model name is deployed already on CMLE.\"\"\"\n  ml = discovery.build('ml', 'v1')\n\n  model_id = 'projects/{}/models/{}'.format(project_name, model_name)\n  request = ml.projects().models().get(name=model_id)\n  try:\n    response = request.execute()\n    return True\n  except:\n    return False\n\n\ndef create_model(project_name, model_name):\n  \"\"\"Creates a model on CMLE.\"\"\"\n  ml = discovery.build('ml', 'v1')\n\n  request_dict = {'name': model_name}\n  project_id = 'projects/{}'.format(project_name)\n  request = ml.projects().models().create(parent=project_id, body=request_dict)\n  try:\n    response = request.execute()\n  except errors.HttpError as err:\n    raise ValueError('There was an error creating the model.' +\n                     ' Check the details: {}'.format(err._get_reason()))\n\n\ndef create_version(project_name, model_name, version_name, model_dir):\n  \"\"\"Creates a version of a model on CMLE.\"\"\"\n\n  ml = discovery.build('ml', 'v1')\n  request_dict = {\n      'name': version_name,\n      'deploymentUri': model_dir,\n      'runtimeVersion': '1.10'\n  }\n  model_id = 'projects/{}/models/{}'.format(project_name, model_name)\n  request = ml.projects().models().versions().create(\n      parent=model_id, body=request_dict)\n\n  try:\n    response = request.execute()\n    operation_id = response['name']\n    return operation_id\n\n  except errors.HttpError as err:\n    raise ValueError('There was an error creating the version.' +\n                     ' Check the details:'.format(err._get_reason()))\n\n\ndef check_version_deployed(operation_id):\n  \"\"\"Loops until the version has been deployed on CMLE.\"\"\"\n\n  ml = discovery.build('ml', 'v1')\n  request = ml.projects().operations().get(name=operation_id)\n\n  done = False\n  while not done:\n    response = None\n    time.sleep(0.3)\n    try:\n      response = request.execute()\n      done = response.get('done', False)\n    except errors.HttpError as err:\n      raise ValueError('There was an error getting the operation.' +\n                       ' Check the details: {}'.format(err._get_reason()))\n      done = True\n\n\ndef deploy_model_version(project_name, model_name, version_name, model_dir):\n  \"\"\"Deploys one TF model on CMLE.\n\n  Args:\n    project_name: Name of a CMLE project.\n    model_name: Name of the model to deploy. If it does not exist yet, the model\n      will be created.\n    version_name: Version of the model on CMLE.\n    Model_dir: Where to find the exported model.\n  \"\"\"\n\n  if not check_model_exists(project_name, model_name):\n    create_model(project_name, model_name)\n  operation_id = create_version(project_name, model_name, version_name,\n                                model_dir)\n  return operation_id\n\n\ndef _get_version_name(model_dir, go_up_3=True):\n  \"\"\"Looks for the version_name in the model_directory name.\n  \n  Example: model_dir = gs://.../20190328_103329/model_dir/102500/1553798665/\n    If go_up_3, it will grab '20190328_103329'\n    if not, it will grab '1553798665'.\n  Typically speaking, set up go_up_3=False if a model_run has several exported models.\"\"\"\n  if go_up_3:\n    name = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(model_dir))))\n    return 'v_{}'.format(os.path.basename(name))\n  else:\n    return 'v_{}'.format(os.path.basename(os.path.dirname(model_dir)))\n\n\ndef deploy_all_models(list_model_dir, project_name, model_name):\n  \"\"\"Finds and deploys all models present a list of directories.\n\n  Args:\n    list_model_dir: List of directories to explore.\n    project_name: Name of the project.\n    model_name: Name of the model. All the model found in the parent_dir will be\n      saved within the same main model.\n  \"\"\"\n\n  models = []\n  for _model_dir in args.list_model_dir.split(','):\n    models.extend(get_list_models_to_export(_model_dir))\n  logging.info('Exploration finished: {} models detected.'.format(\n      len(models)))\n\n  num_epochs = int(len(models) / CLOUD_ML_VERSION_CREATE_QUOTA)\n  for i in range(0, num_epochs + 1):\n    indices = range(i * CLOUD_ML_VERSION_CREATE_QUOTA,\n                    (i + 1) * CLOUD_ML_VERSION_CREATE_QUOTA)\n    operation_id_list = []\n    for j in indices:\n      if j >= len(models):\n        break\n      version_name = _get_version_name(models[j])\n      operation_id = deploy_model_version(\n          project_name=project_name,\n          model_name=model_name,\n          version_name=version_name,\n          model_dir=models[j])\n      operation_id_list.append(operation_id)\n\n    logging.info('Waiting for versions to be deployed...')\n    for operation_id in operation_id_list:\n      check_version_deployed(operation_id)\n\n  logging.info('DONE. {} models have been deployed'.format(len(models)))\n\n\nif __name__ == '__main__':\n\n  parser = argparse.ArgumentParser()\n  parser.add_argument(\n      '--list_model_dir',\n      help='List of the model directory (comma separated).',\n      required=True\n  )\n  parser.add_argument(\n      '--project_name', help='Name of GCP project.', default='conversationai-models')\n  parser.add_argument(\n      '--model_name',\n      help='Name of the model on CMLE.',\n      default='tf_test')\n  args = parser.parse_args(args=sys.argv[1:])\n\n  tf.logging.set_verbosity(tf.logging.INFO)\n\n  deploy_all_models(args.list_model_dir, args.project_name, args.model_name)\n"
  },
  {
    "path": "model_evaluation/utils_export/utils_cloudml.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Defines some utilities to use cloud MLE batch prediction jobs.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport datetime\nimport json\nimport os\nimport re\nimport time\n\nimport googleapiclient.discovery as discovery\nimport googleapiclient.errors as errors\nimport tensorflow as tf\nfrom tensorflow.python.lib.io import file_io\nfrom tensorflow.python.platform import tf_logging as logging\n\n\ndef call_model_predictions_from_df(project_name,\n                                   input_tf_records,\n                                   output_prediction_path,\n                                   model_name,\n                                   version_name=None):\n  \"\"\"Calls a prediction job.\n\n  Args:\n    project_name: gcp project name.\n    input_tf_records: gcs path to input tf_records.\n    output_prediction_path: gcs path to store tf_records, which will be outputs\n      to batch prediction job.\n    model_name: Model name used to run predictions. The model must take as\n      inputs TF-Records with fields $TEXT_FEATURE_NAME and $SENTENCE_KEY, and\n      should return a dictionary including the field $LABEL_NAME.\n    version_name: Model version to run predictions. If None, it will use default\n      version of the model.\n\n  Returns:\n    job_id: the job_id of the prediction job.\n\n  Raises:\n    ValueError: if input_tf_records does not exist.\n  \"\"\"\n\n  # Create tf-records if necessary.\n  if not file_io.file_exists(input_tf_records):\n    raise ValueError('tf_records do not exist.')\n\n  # Call batch prediction job.\n  job_id = _call_batch_job(\n      project_name,\n      input_paths=input_tf_records,\n      output_path=output_prediction_path,\n      model_name=model_name,\n      version_name=version_name)\n\n  return job_id\n\n\ndef _call_batch_job(project_name,\n                    input_paths,\n                    output_path,\n                    model_name,\n                    version_name=None):\n  \"\"\"Calls a batch prediction job on Cloud MLE.\"\"\"\n\n  batch_predict_body = _make_batch_job_body(\n      project_name,\n      input_paths,\n      output_path,\n      model_name,\n      version_name=version_name)\n\n  project_id = 'projects/{}'.format(project_name)\n\n  ml = discovery.build('ml', 'v1')\n  request = ml.projects().jobs().create(\n      parent=project_id, body=batch_predict_body)\n\n  try:\n    response = request.execute()\n    logging.info('state : {}'.format(response['state']))\n    return response['jobId']\n\n  except errors.HttpError as err:\n    # Something went wrong, print out some information.\n    logging.info('There was an error getting the prediction results.'\n                 'Check the details:')\n    logging.info(err._get_reason())\n\n\ndef _make_batch_job_body(project_name,\n                         input_paths,\n                         output_path,\n                         model_name,\n                         region='us-central1',\n                         data_format='TF_RECORD',\n                         version_name=None,\n                         max_worker_count=None,\n                         runtime_version=None):\n  \"\"\"Creates the request body for Cloud MLE batch prediction job.\"\"\"\n\n  project_id = 'projects/{}'.format(project_name)\n  model_id = '{}/models/{}'.format(project_id, model_name)\n  if version_name:\n    version_id = '{}/versions/{}'.format(model_id, version_name)\n\n  # Make a jobName of the format \"model_name_batch_predict_YYYYMMDD_HHMMSS\"\n  timestamp = time.strftime('%Y%m%d_%H%M%S', time.gmtime())\n\n  # Make sure the project name is formatted correctly to work as the basis\n  # of a valid job name.\n  clean_project_name = re.sub(r'\\W+', '_', project_name)\n\n  job_id = '{}_{}_{}'.format(clean_project_name, model_name, timestamp)\n\n  # Start building the request dictionary with required information.\n  body = {\n      'jobId': job_id,\n      'predictionInput': {\n          'dataFormat': data_format,\n          'inputPaths': input_paths,\n          'outputPath': output_path,\n          'region': region\n      }\n  }\n\n  # Use the version if present, the model (its default version) if not.\n  if version_name:\n    body['predictionInput']['versionName'] = version_id\n  else:\n    body['predictionInput']['modelName'] = model_id\n\n  # Only include a maximum number of workers or a runtime version if specified.\n  # Otherwise let the service use its defaults.\n  if max_worker_count:\n    body['predictionInput']['maxWorkerCount'] = max_worker_count\n\n  if runtime_version:\n    body['predictionInput']['runtimeVersion'] = runtime_version\n\n  return body\n\n\ndef check_job_over(project_name, job_name):\n  \"\"\"Sleeps until the batch job is over.\"\"\"\n\n  ml = discovery.build('ml', 'v1')\n  request = ml.projects().jobs().get(\n      name='projects/{}/jobs/{}'.format(project_name, job_name))\n  job_completed = False\n  k = 0\n  start_time = datetime.datetime.now()\n  while not job_completed:\n    response = request.execute()\n    job_completed = (response['state'] == 'SUCCEEDED')\n    if not job_completed:\n      if not (k % 5):\n        time_spent = int(\n            (datetime.datetime.now() - start_time).total_seconds() / 60)\n        logging.info(\n            'Waiting for prediction job to complete. Minutes elapsed: {}'\n            .format(time_spent))\n      time.sleep(30)\n    k += 1\n\n  logging.info('Prediction job completed.')\n\n\ndef add_model_predictions_to_df(df, prediction_file, model_col_name,\n                                prediction_name, example_key, class_names):\n  \"\"\"Loads the prediction files and adds the model scores to a DataFrame.\n\n  Args:\n    df: a pandas `DataFrame`.\n    prediction_file: Path to the prediction files (outputs of CMLE prediction\n      job).\n    model_col_name: Column name of the prediction values in df (added column).\n    prediction_name: Name of the column to retrieve from CMLE predictions.\n    example_key: key identifier of an example.\n    class_names: If the model is a multiclass model, you can specify class names.\n          The model will then return a logit value per class instead of a single value.\n  Returns:\n    df: a pandas ` DataFrame` with an added column named 'column_name_of_model'\n      containing the prediction values.\n\n  Raises:\n    ValueError: dataframe and  prediction file do not correspond exactly\n      In particular, they must have same number of lines and same order.\n    ValueError: prediction file does not exist.\n\n  This function reads the prediction file and extracts the fields\n  'prediction_name'\n    and example_key. It orders the results based on example_key and then adds\n    them to df\n    in a new column called 'model_col_name'.\n  \"\"\"\n\n  prediction_file = os.path.join(prediction_file,\n                                 'prediction.results-00000-of-00001')\n  if not tf.gfile.Exists(prediction_file):\n    raise ValueError(\n        'Prediction file does not exist.'\n        ' You need to call prediction job and wait for completion.')\n\n  def _load_predictions(pred_file):\n    with file_io.FileIO(pred_file, 'r') as f:\n      # prediction file needs to fit in memory.\n      try:\n        predictions = [json.loads(line) for line in f]\n      except:\n        predictions = []\n    return predictions\n\n  predictions = _load_predictions(prediction_file)\n\n  if not predictions:\n    raise ValueError(\n        'The prediction file returned by CMLE is empty.'\n        ' It might be due to a badly formatted tfrecord input file that can not be'\n        ' parsed by CMLE (wrong input signature given by a `Model` instance).'\n        ' Check the logs of your CMLE job for further details.')\n  if example_key not in predictions[0]:\n    raise ValueError(\n        \"Predictions do not contain the 'example_key' field.\"\n        \" Verify that your 'example_key' parameter (set to {})\"\n        \" matches the CMLE model signature.\".format(example_key))\n  if prediction_name not in predictions[0]:\n    raise ValueError(\n        \"Predictions do not contain the 'prediction_name' field.\"\n        \" Verify that your 'prediction_name' parameter (set to {})\"\n        \" matches the CMLE model signature.\".format(prediction_name))\n  if len(predictions) != len(df):\n    raise ValueError('The dataframe and the prediction file do not contain'\n                     ' the same number of lines.')\n\n  predictions = sorted(predictions, key=lambda x: x[example_key])\n  if class_names is None:\n      prediction_proba = [x[prediction_name][0] for x in predictions]\n      df[model_col_name] = prediction_proba\n  else:\n      for i, class_name in enumerate(class_names):\n            df['{}_{}'.format(model_col_name,class_name)] = [x[prediction_name][i] for x in predictions]\n\n  return df\n"
  },
  {
    "path": "model_evaluation/utils_export/utils_cloudml_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for tf records utilities.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport pandas as pd\nimport unittest\n\nimport utils_cloudml\n\n\nclass CallModelPredictionsFromDf(unittest.TestCase):\n  \"\"\"Tests for `call_model_predictions_from_df`.\"\"\"\n\n  #TODO(fprost): Implement these.\n\n  def test_correct(self):\n    return\n\n\nclass CheckJobOver(unittest.TestCase):\n  \"\"\"Tests for `check_job_over`.\"\"\"\n\n  # TODO(fprost): Implement these.\n  def test_correct(self):\n    return\n\n\nclass AddModelPredictionsToDf(unittest.TestCase):\n  \"\"\"Tests for `add_model_predictions_to_df`.\"\"\"\n\n  def setUp(self):\n    self.COMMENT_KEY = 'comment_key'\n    self._df = pd.DataFrame({\n        self.COMMENT_KEY: [0, 1],\n        'other_field_1': ['I am a man', 'I am a woman'],\n        })\n    self._prediction_file = 'gs://kaggle-model-experiments/files_for_unittest/model1:v1'\n    self._model_col_name = 'model1:v1_preds'\n    self._prediction_name = 'toxicity/logistic'\n    self._example_key = self.COMMENT_KEY\n\n  def test_missing_prediction_file(self):\n    path = 'not_existing_folder/not_existing_file_path'\n\n    with self.assertRaises(Exception) as context:\n      utils_cloudml.add_model_predictions_to_df(\n          self._df,\n          path,\n          self._model_col_name,\n          self._prediction_name,\n          self._example_key)\n      self.assertIn(\n          'Prediction file does not exist.',\n          str(context.exception))\n\n  def test_empty_prediction_file(self):\n    path = 'gs://kaggle-model-experiments/files_for_unittest/for_empty_predictions'\n\n    with self.assertRaises(Exception) as context:\n      utils_cloudml.add_model_predictions_to_df(\n          self._df,\n          path,\n          self._model_col_name,\n          self._prediction_name,\n          self._example_key)\n    self.assertIn(\n        'The prediction file returned by CMLE is empty.',\n        str(context.exception))\n\n  def test_missing_example_key(self):\n    example_key = 'not_found_example_key'\n    with self.assertRaises(Exception) as context:\n      utils_cloudml.add_model_predictions_to_df(\n          self._df,\n          self._prediction_file,\n          self._model_col_name,\n          self._prediction_name,\n          example_key,\n          )\n    self.assertIn(\n        \"Predictions do not contain the 'example_key' field.\",\n        str(context.exception))\n\n  def test_missing_prediction_key(self):\n    prediction_key = 'not_found_prediction_key'\n    with self.assertRaises(Exception) as context:\n      utils_cloudml.add_model_predictions_to_df(\n          self._df,\n          self._prediction_file,\n          self._model_col_name,\n          prediction_key,\n          self._example_key)\n    self.assertIn(\n        \"Predictions do not contain the 'prediction_name' field.\",\n        str(context.exception))\n\n  def test_correct(self):\n    output_df = utils_cloudml.add_model_predictions_to_df(\n        self._df,\n        self._prediction_file,\n        self._model_col_name,\n        self._prediction_name,\n        self._example_key)\n    right_output = pd.DataFrame({\n        self.COMMENT_KEY: [0, 1],\n        'other_field_1': ['I am a man', 'I am a woman'],\n        self._model_col_name: [0.38753455877304077, 0.045782867819070816]\n        })\n    pd.testing.assert_frame_equal(\n        output_df.sort_index(axis=1), right_output.sort_index(axis=1))\n\n\nif __name__ == '__main__':\n  unittest.main()"
  },
  {
    "path": "model_evaluation/utils_export/utils_tfrecords.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Defines some utilities to use TF-Records with pandas DataFrame.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport pandas as pd\nimport random\nimport re\n\nimport tensorflow as tf\nfrom tensorflow.python.lib.io import file_io\nfrom tensorflow.python.platform import tf_logging as logging\n\n\ndef _bytes_feature(value):\n  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))\n\n\ndef _int64_feature(value):\n  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))\n\n\ndef _bytes_list_feature(value_list):\n  return tf.train.Feature(\n      bytes_list=tf.train.BytesList(\n          value=[tf.compat.as_bytes(value) for value in value_list]))\n\n\nclass EncodingFeatureSpec(object):\n\n  INTEGER = 'integer'\n  STRING = 'string'\n  LIST_STRING = 'list_string'\n\n  CONSTRUCTOR_PER_TYPE = {\n      INTEGER: _int64_feature,\n      STRING: _bytes_feature,\n      LIST_STRING: _bytes_list_feature\n  }\n\n\ndef is_valid_spec(spec):\n  \"\"\"Verfies that the spec matches requirements.\"\"\"\n  if not isinstance(spec, dict):\n    raise ValueError('Spec should be a dictionary instance.')\n  for (key, item) in spec.items():\n    if not isinstance(key, str):\n      raise ValueError(\n          'Spec is badly defined. Keys should be string (field names).')\n    if item not in EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE.keys():\n      raise ValueError(\n          'Spec is badly defined. Authorized types are one of {}.'.format(\n              EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE.keys()))\n\n\ndef encode_pandas_to_tfrecords(df,\n                               feature_keys_spec,\n                               tf_records_path,\n                               example_key=None):\n  \"\"\"Write a pandas `DataFrame` to a tf_record.\n\n  Args:\n    df: pandas `DataFrame`. It must include the fields that are part of\n      feature_key_spec.\n    feature_keys_spec: Dict of {name: type}, which describes the spec of the\n      TF-records.\n    tf_records_path: where to write the tf records.\n    example_key: key identifier of an example (string). This key will be added\n      to data automatically and should not be part of df. If none, no\n      example_key will be created.\n\n  Raises:\n    ValueError if feature_keys_spec does not follow a FeatureSpec format.\n\n  Note: TFRecords will have fields feature_keys_spec and\n  `example_key`(optional).\n  \"\"\"\n\n  is_valid_spec(feature_keys_spec)\n\n  writer = tf.python_io.TFRecordWriter(tf_records_path)\n  for i in range(len(df)):\n\n    if not i % 10000:\n      logging.info('Preparing train data: {}/{}'.format(i, len(df)))\n\n    # Create a feature\n    feature_dict = {}\n    for feature in feature_keys_spec:\n      constructor = EncodingFeatureSpec.CONSTRUCTOR_PER_TYPE[\n          feature_keys_spec[feature]]\n      feature_dict[feature] = constructor(df[feature].iloc[i])\n      if example_key:\n        feature_dict[example_key] = _int64_feature(i)\n    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))\n\n    # Serialize to string and write on the file\n    writer.write(example.SerializeToString())\n\n  writer.close()\n\n\ndef decode_tf_records_to_pandas(decoding_features_spec,\n                                tf_records_path,\n                                max_n_examples=None,\n                                random_filter_keep_rate=1.0,\n                                filter_fn=None):\n  \"\"\"Loads tf-records into a pandas dataframe.\n\n  Args:\n    decoding_features_spec: A dict mapping feature keys to FixedLenFeature\n      values. Spec of the tf-records.\n    tf_records_path: path to the file\n    max_n_examples: Maximum number of examples to extract.\n    random_filter_keep_rate: Probability for each line to be kept in training\n      data. For each line, we generate a random number x and keep it if x <\n      random_filter_keep_rate.\n    filter_fn (optional): Function applied to an example. If it returns False,\n      the example will be discarded.\n\n  Returns:\n    A pandas `DataFrame`.\n  \"\"\"\n\n  if not max_n_examples:\n    max_n_examples = float('inf')\n\n  reader = tf.TFRecordReader()\n  filenames = tf.train.match_filenames_once(tf_records_path)\n  filename_queue = tf.train.string_input_producer(filenames,\n                                                  num_epochs=1)\n\n  _, serialized_example = reader.read(filename_queue)\n  read_data = tf.parse_single_example(\n      serialized=serialized_example, features=decoding_features_spec)\n\n  sess = tf.InteractiveSession()\n  sess.run(tf.global_variables_initializer())\n  sess.run(tf.local_variables_initializer())\n  sess.run(tf.tables_initializer())\n  tf.train.start_queue_runners(sess)\n\n  d = []\n  new_line = sess.run(read_data)\n  count = 0\n  while new_line:\n    if filter_fn:\n      keep_line = filter_fn(new_line)\n    else:\n      keep_line = True\n    keep_line = keep_line and (random.random() < random_filter_keep_rate)\n\n    if keep_line:\n      d.append(new_line)\n      count += 1\n      if count >= max_n_examples:\n        break\n      if not (count % 100000):\n        logging.info('Loaded {} lines.'.format(count))\n\n    try:\n      new_line = sess.run(read_data)\n    except tf.errors.OutOfRangeError:\n      logging.info('End of file.')\n      break\n\n  res = pd.DataFrame(d)\n  return res\n"
  },
  {
    "path": "model_evaluation/utils_export/utils_tfrecords_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for tf records utilities.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport unittest\n\nimport pandas as pd\nimport tensorflow as tf\n\nimport utils_tfrecords\n\n\nclass TestEncodingAndDecoding(unittest.TestCase):\n  \"\"\"Test to encode and decode a pandas DataFrame\"\"\"\n\n  def testCorrect(self):\n    input_df = pd.DataFrame({\n        'x': [1, 2, 3],\n        'y': ['a', 'b', 'c'],\n        'z': [['a', 'b'], ['c', 'd'], ['e', 'f']]\n    })\n    encoding_feature_spec = {\n        'x': utils_tfrecords.EncodingFeatureSpec.INTEGER,\n        'y': utils_tfrecords.EncodingFeatureSpec.STRING,\n        'z': utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n    }\n    decoding_spec = {\n        'x': tf.FixedLenFeature([], dtype=tf.int64),\n        'y': tf.FixedLenFeature([], dtype=tf.string),\n        'z': tf.FixedLenFeature([2], dtype=tf.string),\n    }\n    tf_records_path = 'unittest.tf_records'\n    utils_tfrecords.encode_pandas_to_tfrecords(input_df, encoding_feature_spec,\n                                               tf_records_path)\n\n    output_df = utils_tfrecords.decode_tf_records_to_pandas(\n        decoding_spec, tf_records_path)\n    try:\n      pd.testing.assert_frame_equal(input_df, output_df)\n    except ValueError:\n      self.fail('Dataset raised an exception unexpectedly!')\n\n\nclass TestFeatureKeySpec(unittest.TestCase):\n  \"\"\"Verifies the format of Feature Spec\"\"\"\n\n  def test_not_a_dictionary(self):\n    feature_keys_spec = 'not_a_dict',\n    with self.assertRaises(Exception) as context:\n      utils_tfrecords.is_valid_spec(feature_keys_spec)\n    self.assertIn('Spec should be a dictionary instance.',\n                  str(context.exception))\n\n  def test_not_in_possible(self):\n    feature_keys_spec = {'key': 'other_possibility'}\n    with self.assertRaises(Exception) as context:\n      utils_tfrecords.is_valid_spec(feature_keys_spec)\n    self.assertIn('Spec is badly defined. Authorized types are one of',\n                  str(context.exception))\n\n  def test_valid(self):\n    try:\n      feature_keys_spec = {\n          'comment_text': utils_tfrecords.EncodingFeatureSpec.LIST_STRING\n      }\n      utils_tfrecords.is_valid_spec(feature_keys_spec)\n    except ValueError:\n      self.fail('Dataset raised an exception unexpectedly!')\n\n\nif __name__ == '__main__':\n  unittest.main()\n"
  },
  {
    "path": "travis_blase_test_support/bazel_0.18.1-linux-x86_64.deb.sha256",
    "content": "4c2cd0a71ab1b65753aeb757af36bd6ebde9da4e53183525a1e1849c2542fdda  bazel_0.18.1-linux-x86_64.deb\n"
  }
]